Skip to content

Commit 2bdd52a

Browse files
build: replace rockylinux with chainguard/wolfi as a base image (#423)
### Summary Updates the Dockerfile to use the Chainguard wolfi-base image to reduce CVEs. Also adds a step in the docker publish job that scans the images and checks for CVEs before publishing. ### Testing Run `make docker-build` and `make docker-start-api`, then try: ``` from unstructured.partition.api import partition_via_api elements = partition_via_api( filename=filename, api_url="http://localhost:8000/general/v0/general", api_key="<API-KEY>", strategy="hi_res", ) print("\n\n".join([str(el) for el in elements])) ```
1 parent fbdc6af commit 2bdd52a

File tree

8 files changed

+67
-12
lines changed

8 files changed

+67
-12
lines changed

.github/workflows/ci.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,9 @@ jobs:
112112
source .venv/bin/activate
113113
make docker-build
114114
make docker-test
115+
- name: Scan image
116+
uses: anchore/scan-action@v3
117+
with:
118+
image: "pipeline-family-${{ env.PIPELINE_FAMILY }}-dev"
119+
# NOTE(robinson) - revert this to medium when we bump libreoffice
120+
severity-cutoff: high

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 0.0.71-dev0
2+
3+
* replace rockylinux with chainguard/wolfi as a base image for `amd64`
4+
15
## 0.0.70
26

37
* Bump to `unstructured` 0.14.6

Dockerfile-amd64

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# syntax=docker/dockerfile:experimental
2+
FROM quay.io/unstructured-io/base-images:wolfi-base@sha256:6c00a236c648ffdaf196ccbc446f5c6cc9eb4e3ab9e437178abcfac710b2b373 as base
3+
4+
# NOTE(crag): NB_USER ARG for mybinder.org compat:
5+
# https://mybinder.readthedocs.io/en/latest/tutorials/dockerfile.html
6+
ARG NB_USER=notebook-user
7+
ARG NB_UID=1000
8+
ARG PIP_VERSION
9+
ARG PIPELINE_PACKAGE
10+
ARG PYTHON_VERSION="3.11"
11+
12+
# Set up environment
13+
ENV PYTHON python${PYTHON_VERSION}
14+
ENV PIP ${PYTHON} -m pip
15+
16+
WORKDIR ${HOME}
17+
USER ${NB_USER}
18+
19+
ENV PYTHONPATH="${PYTHONPATH}:${HOME}"
20+
ENV PATH="/home/${NB_USER}/.local/bin:${PATH}"
21+
22+
FROM base as python-deps
23+
COPY --chown=${NB_USER}:${NB_USER} requirements/base.txt requirements-base.txt
24+
RUN ${PIP} install pip==${PIP_VERSION}
25+
RUN ${PIP} install --no-cache -r requirements-base.txt
26+
27+
FROM python-deps as model-deps
28+
RUN ${PYTHON} -c "import nltk; nltk.download('punkt')" && \
29+
${PYTHON} -c "import nltk; nltk.download('averaged_perceptron_tagger')" && \
30+
${PYTHON} -c "from unstructured.partition.model_init import initialize; initialize()"
31+
32+
FROM model-deps as code
33+
COPY --chown=${NB_USER}:${NB_USER} CHANGELOG.md CHANGELOG.md
34+
COPY --chown=${NB_USER}:${NB_USER} logger_config.yaml logger_config.yaml
35+
COPY --chown=${NB_USER}:${NB_USER} prepline_${PIPELINE_PACKAGE}/ prepline_${PIPELINE_PACKAGE}/
36+
COPY --chown=${NB_USER}:${NB_USER} exploration-notebooks exploration-notebooks
37+
COPY --chown=${NB_USER}:${NB_USER} scripts/app-start.sh scripts/app-start.sh
38+
39+
ENTRYPOINT ["scripts/app-start.sh"]
40+
# Expose a default port of 8000. Note: The EXPOSE instruction does not actually publish the port,
41+
# but some tooling will inspect containers and perform work contingent on networking support declared.
42+
EXPOSE 8000
File renamed without changes.

prepline_general/api/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
app = FastAPI(
1414
title="Unstructured Pipeline API",
1515
summary="Partition documents with the Unstructured library",
16-
version="0.0.70",
16+
version="0.0.71",
1717
docs_url="/general/docs",
1818
openapi_url="/general/openapi.json",
1919
servers=[

prepline_general/api/general.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,7 @@ def return_content_type(filename: str):
713713

714714

715715
@router.get("/general/v0/general", include_in_schema=False)
716-
@router.get("/general/v0.0.70/general", include_in_schema=False)
716+
@router.get("/general/v0.0.71/general", include_in_schema=False)
717717
async def handle_invalid_get_request():
718718
raise HTTPException(
719719
status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Only POST requests are supported."
@@ -728,7 +728,7 @@ async def handle_invalid_get_request():
728728
description="Description",
729729
operation_id="partition_parameters",
730730
)
731-
@router.post("/general/v0.0.70/general", include_in_schema=False)
731+
@router.post("/general/v0.0.71/general", include_in_schema=False)
732732
def general_partition(
733733
request: Request,
734734
# cannot use annotated type here because of a bug described here:

preprocessing-pipeline-family.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
name: general
2-
version: 0.0.70
2+
version: 0.0.71

scripts/docker-build.sh

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,17 @@ DOCKER_IMAGE="${DOCKER_IMAGE:-pipeline-family-${PIPELINE_FAMILY}-dev}"
99
DOCKER_PLATFORM="${DOCKER_PLATFORM:-}"
1010

1111

12-
DOCKER_BUILD_CMD=(docker buildx build --load -f Dockerfile \
13-
--build-arg PIP_VERSION="$PIP_VERSION" \
14-
--build-arg BUILDKIT_INLINE_CACHE=1 \
15-
--build-arg PIPELINE_PACKAGE="$PIPELINE_PACKAGE" \
16-
--progress plain \
17-
--target code \
18-
--cache-from "$DOCKER_REPOSITORY":latest \
19-
-t "$DOCKER_IMAGE" .)
12+
DOCKER_BUILD_CMD=(
13+
docker buildx build --load -f Dockerfile-amd64
14+
--build-arg PIP_VERSION="$PIP_VERSION"
15+
--build-arg BUILDKIT_INLINE_CACHE=1
16+
--build-arg PIPELINE_PACKAGE="$PIPELINE_PACKAGE"
17+
--progress plain
18+
--platform linux/amd64
19+
--cache-from "$DOCKER_REPOSITORY:latest"
20+
-t "$DOCKER_IMAGE"
21+
.
22+
)
2023

2124
# only build for specific platform if DOCKER_PLATFORM is set
2225
if [ -n "${DOCKER_PLATFORM:-}" ]; then

0 commit comments

Comments
 (0)