Skip to content

feat/add batch file data support #32

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.0.14

* **Add support for batch file data**

## 0.0.13

* **Conform to PEP-625 compliance for project naming**
Expand Down
76 changes: 37 additions & 39 deletions requirements/cli.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
#
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile ./cli.in
#
# This file was autogenerated by uv via the following command:
# uv pip compile ./cli.in --output-file ./cli.txt --no-strip-extras --python-version 3.10
annotated-types==0.7.0
# via pydantic
anyio==4.6.0
anyio==4.7.0
# via starlette
asgiref==3.8.1
# via opentelemetry-instrumentation-asgi
Expand All @@ -17,95 +13,100 @@ click==8.1.7
# uvicorn
dataclasses-json==0.6.7
# via unstructured-ingest
deprecated==1.2.14
deprecated==1.2.15
# via
# opentelemetry-api
# opentelemetry-exporter-otlp-proto-grpc
# opentelemetry-semantic-conventions
exceptiongroup==1.2.2
# via anyio
fastapi==0.115.0
fastapi==0.115.6
# via -r ./cli.in
googleapis-common-protos==1.65.0
googleapis-common-protos==1.66.0
# via opentelemetry-exporter-otlp-proto-grpc
grpcio==1.66.1
grpcio==1.68.1
# via opentelemetry-exporter-otlp-proto-grpc
h11==0.14.0
# via uvicorn
idna==3.10
# via anyio
importlib-metadata==8.4.0
importlib-metadata==8.5.0
# via opentelemetry-api
marshmallow==3.22.0
marshmallow==3.23.1
# via dataclasses-json
mypy-extensions==1.0.0
# via typing-inspect
numpy==2.1.1
ndjson==0.3.1
# via unstructured-ingest
numpy==2.2.0
# via pandas
opentelemetry-api==1.27.0
opentelemetry-api==1.29.0
# via
# opentelemetry-exporter-otlp-proto-grpc
# opentelemetry-instrumentation
# opentelemetry-instrumentation-asgi
# opentelemetry-instrumentation-fastapi
# opentelemetry-sdk
# opentelemetry-semantic-conventions
opentelemetry-exporter-otlp-proto-common==1.27.0
opentelemetry-exporter-otlp-proto-common==1.29.0
# via opentelemetry-exporter-otlp-proto-grpc
opentelemetry-exporter-otlp-proto-grpc==1.27.0
opentelemetry-exporter-otlp-proto-grpc==1.29.0
# via -r ./cli.in
opentelemetry-instrumentation==0.48b0
opentelemetry-instrumentation==0.50b0
# via
# opentelemetry-instrumentation-asgi
# opentelemetry-instrumentation-fastapi
opentelemetry-instrumentation-asgi==0.48b0
opentelemetry-instrumentation-asgi==0.50b0
# via opentelemetry-instrumentation-fastapi
opentelemetry-instrumentation-fastapi==0.48b0
opentelemetry-instrumentation-fastapi==0.50b0
# via -r ./cli.in
opentelemetry-proto==1.27.0
opentelemetry-proto==1.29.0
# via
# opentelemetry-exporter-otlp-proto-common
# opentelemetry-exporter-otlp-proto-grpc
opentelemetry-sdk==1.27.0
opentelemetry-sdk==1.29.0
# via
# opentelemetry-exporter-otlp-proto-grpc
# unstructured-ingest
opentelemetry-semantic-conventions==0.48b0
opentelemetry-semantic-conventions==0.50b0
# via
# opentelemetry-instrumentation
# opentelemetry-instrumentation-asgi
# opentelemetry-instrumentation-fastapi
# opentelemetry-sdk
opentelemetry-util-http==0.48b0
opentelemetry-util-http==0.50b0
# via
# opentelemetry-instrumentation-asgi
# opentelemetry-instrumentation-fastapi
packaging==24.1
# via marshmallow
packaging==24.2
# via
# marshmallow
# opentelemetry-instrumentation
pandas==2.2.3
# via unstructured-ingest
protobuf==4.25.5
protobuf==5.29.1
# via
# googleapis-common-protos
# opentelemetry-proto
pydantic==2.9.2
pydantic==2.10.3
# via
# fastapi
# unstructured-ingest
pydantic-core==2.23.4
pydantic-core==2.27.1
# via pydantic
python-dateutil==2.9.0.post0
# via
# pandas
# unstructured-ingest
pytz==2024.2
# via pandas
six==1.16.0
six==1.17.0
# via python-dateutil
sniffio==1.3.1
# via anyio
starlette==0.38.6
starlette==0.41.3
# via fastapi
tqdm==4.66.5
tqdm==4.67.1
# via unstructured-ingest
typing-extensions==4.12.2
# via
Expand All @@ -121,16 +122,13 @@ typing-inspect==0.9.0
# via dataclasses-json
tzdata==2024.2
# via pandas
unstructured-ingest==0.0.18
unstructured-ingest==0.3.10
# via -r ./cli.in
uvicorn==0.30.6
uvicorn==0.34.0
# via -r ./cli.in
wrapt==1.16.0
wrapt==1.17.0
# via
# deprecated
# opentelemetry-instrumentation
zipp==3.20.2
zipp==3.21.0
# via importlib-metadata

# The following packages are considered to be unsafe in a requirements file:
# setuptools
2 changes: 1 addition & 1 deletion requirements/constraints.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
unstructured-ingest>=0.0.18
unstructured-ingest>=0.3.10
18 changes: 7 additions & 11 deletions requirements/lint.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
#
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile ./lint.in
#
# This file was autogenerated by uv via the following command:
# uv pip compile ./lint.in --output-file ./lint.txt --no-strip-extras --python-version 3.10
autoflake==2.3.1
# via -r ./lint.in
black==24.8.0
black==24.10.0
# via -r ./lint.in
click==8.1.7
# via black
Expand All @@ -18,13 +14,13 @@ flake8-print==5.0.0
# via -r ./lint.in
mccabe==0.7.0
# via flake8
mypy==1.11.2
mypy==1.13.0
# via -r ./lint.in
mypy-extensions==1.0.0
# via
# black
# mypy
packaging==24.1
packaging==24.2
# via black
pathspec==0.12.1
# via black
Expand All @@ -38,9 +34,9 @@ pyflakes==3.2.0
# via
# autoflake
# flake8
ruff==0.6.7
ruff==0.8.3
# via -r ./lint.in
tomli==2.0.1
tomli==2.2.1
# via
# autoflake
# black
Expand Down
36 changes: 17 additions & 19 deletions requirements/release.txt
Original file line number Diff line number Diff line change
@@ -1,30 +1,24 @@
#
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile ./release.in
#
# This file was autogenerated by uv via the following command:
# uv pip compile ./release.in --output-file ./release.txt --no-strip-extras --python-version 3.10
backports-tarfile==1.2.0
# via jaraco-context
certifi==2024.8.30
certifi==2024.12.14
# via requests
charset-normalizer==3.3.2
charset-normalizer==3.4.0
# via requests
docutils==0.21.2
# via readme-renderer
idna==3.10
# via requests
importlib-metadata==8.5.0
# via
# keyring
# twine
# via keyring
jaraco-classes==3.4.0
# via keyring
jaraco-context==6.0.1
# via keyring
jaraco-functools==4.0.2
jaraco-functools==4.1.0
# via keyring
keyring==25.4.1
keyring==25.5.0
# via twine
markdown-it-py==3.0.0
# via rich
Expand All @@ -34,9 +28,11 @@ more-itertools==10.5.0
# via
# jaraco-classes
# jaraco-functools
nh3==0.2.18
nh3==0.2.20
# via readme-renderer
pkginfo==1.10.0
packaging==24.2
# via twine
pkginfo==1.12.0
# via twine
pygments==2.18.0
# via
Expand All @@ -52,15 +48,17 @@ requests-toolbelt==1.0.0
# via twine
rfc3986==2.0.0
# via twine
rich==13.8.1
rich==13.9.4
# via twine
twine==5.1.1
twine==6.0.1
# via -r ./release.in
typing-extensions==4.12.2
# via rich
urllib3==2.2.3
# via
# requests
# twine
wheel==0.44.0
wheel==0.45.1
# via -r ./release.in
zipp==3.20.2
zipp==3.21.0
# via importlib-metadata
14 changes: 5 additions & 9 deletions requirements/test.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
#
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile ./test.in
#
# This file was autogenerated by uv via the following command:
# uv pip compile ./test.in --output-file ./test.txt --no-strip-extras --python-version 3.10
exceptiongroup==1.2.2
# via pytest
iniconfig==2.0.0
# via pytest
packaging==24.1
packaging==24.2
# via pytest
pluggy==1.5.0
# via pytest
pytest==8.3.3
pytest==8.3.4
# via -r ./test.in
tomli==2.0.1
tomli==2.2.1
# via pytest
12 changes: 4 additions & 8 deletions requirements/validate.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
#
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile ./validate.in
#
certifi==2024.8.30
# This file was autogenerated by uv via the following command:
# uv pip compile ./validate.in --output-file ./validate.txt --no-strip-extras --python-version 3.10
certifi==2024.12.14
# via requests
charset-normalizer==3.3.2
charset-normalizer==3.4.0
# via requests
click==8.1.7
# via -r ./validate.in
Expand Down
32 changes: 14 additions & 18 deletions scripts/pip-compile.sh
Original file line number Diff line number Diff line change
@@ -1,27 +1,23 @@
#!/usr/bin/env bash

pushd ./requirements || exit

find . -type f -name "*.txt" ! -name "constraints.txt" -exec rm '{}' ';'
find . -type f -name "*.in" -maxdepth 1 -exec pip-compile --upgrade '{}' ';'

popd || exit
set -e

# Check python version
# python version must match lowest supported (3.10)
major=3
minor=10
python_version=${UV_PYTHON_VERSION:-"3.10"}

versions=$(cat requirements/* | grep "This file is autogenerated by pip-compile with Python" | awk '{print $NF}' | sort | uniq)
if [[ $(echo $versions | wc -w) -ne 1 ]]; then
echo "Files generated with multiple python version: $versions"
# if major and minor python version (x.y) is not equal to current python_version, error out
if [[ $(python --version | cut -d ' ' -f 2 | cut -d '.' -f 1-2) != $(echo "$python_version" | cut -d '.' -f 1-2) ]]; then
echo "Python version must be $python_version (lowest supported) to be able to pip-compile."
exit 1
fi

version_major=$(echo $versions | awk -F"." '{print $1}')
version_minor=$(echo $versions | awk -F"." '{print $2}')
pushd ./requirements || exit

if [[ $major -ne $version_major || $minor -ne $version_minor ]]; then
echo "python version not equal to expected $major.$minor: $versions"
exit 1
fi
find . -type f -name "*.txt" ! -name "constraints.txt" -exec rm '{}' ';'
find . -type f -name "*.in" -print0 | while read -r -d $'\0' in_file; do
echo "compiling $in_file"
# remove .in extension and add .txt extension
txt_file="${in_file%.in}.txt"
uv pip compile --upgrade "$in_file" --output-file "$txt_file" --no-strip-extras --python-version "$python_version"
done
popd || exit
Loading
Loading