-
Notifications
You must be signed in to change notification settings - Fork 169
Closed
Description
I'm using latest docker image along with latest js client to partition pdf and extract images.
When I include extractImageBlockTypes: ['Image']
in partition parameters, the whole partitioning fails with the following error in logs:
2024-06-23 13:50:11,086 127.0.0.1:60040 POST /general/v0/general HTTP/1.1 - 500 Internal Server Error
2024-06-23 13:50:11,087 uvicorn.error ERROR Exception in ASGI application
Traceback (most recent call last):
File "/home/notebook-user/.local/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 399, in run_asgi
result = await app( # type: ignore[func-returns-value]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 70, in __call__
return await self.app(scope, receive, send)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
await super().__call__(scope, receive, send)
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/applications.py", line 123, in __call__
await self.middleware_stack(scope, receive, send)
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/middleware/errors.py", line 186, in __call__
raise exc
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/middleware/errors.py", line 164, in __call__
await self.app(scope, receive, _send)
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 65, in __call__
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/routing.py", line 756, in __call__
await self.middleware_stack(scope, receive, send)
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/routing.py", line 776, in app
await route.handle(scope, receive, send)
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/routing.py", line 297, in handle
await self.app(scope, receive, send)
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/routing.py", line 77, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/routing.py", line 72, in app
response = await func(request)
^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/fastapi/routing.py", line 278, in app
raw_response = await run_endpoint_function(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/fastapi/routing.py", line 193, in run_endpoint_function
return await run_in_threadpool(dependant.call, **values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/starlette/concurrency.py", line 42, in run_in_threadpool
return await anyio.to_thread.run_sync(func, *args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
return await future
^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 859, in run
result = context.run(func, *args)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/prepline_general/api/general.py", line 850, in general_partition
list(response_generator(is_multipart=False))[0]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/prepline_general/api/general.py", line 785, in response_generator
response = pipeline_api(
^^^^^^^^^^^^^
File "/home/notebook-user/prepline_general/api/general.py", line 440, in pipeline_api
elements = partition_pdf_splits(
^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/prepline_general/api/general.py", line 220, in partition_pdf_splits
return partition(
^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/partition/auto.py", line 426, in partition
elements = _partition_pdf(
^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/documents/elements.py", line 593, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/file_utils/filetype.py", line 626, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/file_utils/filetype.py", line 582, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/chunking/dispatch.py", line 74, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/partition/pdf.py", line 192, in partition_pdf
return partition_pdf_or_image(
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/partition/pdf.py", line 288, in partition_pdf_or_image
elements = _partition_pdf_or_image_local(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/utils.py", line 249, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/partition/pdf.py", line 676, in _partition_pdf_or_image_local
save_elements(
File "/home/notebook-user/.local/lib/python3.11/site-packages/unstructured/partition/pdf_image/pdf_image_utils.py", line 195, in save_elements
image_path = image_paths[page_number - 1]
~~~~~~~~~~~^^^^^^^^^^^^^^^^^
IndexError: list index out of range
The client call looks like this:
const { elements } = await client.general.partition({
partitionParameters: {
files: {
fileName: filename,
content: data,
},
strategy: Strategy.Auto,
skipInferTableTypes: ['jpg+png'],
extractImageBlockTypes: ['Image'],
},
});
Metadata
Metadata
Assignees
Labels
No labels