Skip to content

Commit 0165156

Browse files
committed
turbodbc: Update and improve OCI/Dockerfiles. Add simdutf.
1 parent d7bf374 commit 0165156

File tree

8 files changed

+296
-72
lines changed

8 files changed

+296
-72
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Using CrateDB with turbodbc (OCI)
2+
3+
The [dockerfiles](./dockerfiles) folder includes Dockerfiles that exercise
4+
installing the turbodbc driver. They are handy if you can't install it
5+
on your machine. Follow the instructions how to build OCI images and how
6+
to invoke the `demo.py` program using them.
7+
8+
## Build
9+
10+
Make the upcoming build more verbose.
11+
```shell
12+
export BUILDKIT_PROGRESS=plain
13+
export COMPOSE_DOCKER_CLI_BUILD=1
14+
export DOCKER_BUILDKIT=1
15+
```
16+
17+
Build images.
18+
```shell
19+
docker build --tag local/turbodbc-demo-archlinux --file=dockerfiles/archlinux.Dockerfile .
20+
docker build --tag local/turbodbc-demo-centos --file=dockerfiles/centos.Dockerfile .
21+
docker build --tag local/turbodbc-demo-debian --file=dockerfiles/debian.Dockerfile .
22+
docker build --tag local/turbodbc-demo-opensuse --file=dockerfiles/opensuse.Dockerfile .
23+
```
24+
25+
Invoke demo program.
26+
```shell
27+
for OS in archlinux centos debian opensuse; do
28+
docker run --rm -it --volume=$(pwd):/src --network=host \
29+
local/turbodbc-demo-${OS} python3 /src/demo.py
30+
done
31+
```
Lines changed: 53 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,68 @@
1-
# ---------------------------
2-
# Setup archlinux environment
3-
# ---------------------------
1+
# Set up Arch Linux.
2+
FROM archlinux:base AS build
43

5-
# Include `yay` for easily installing AUR packages.
6-
7-
FROM archlinux:base-20230205.0.123931 as archlinux-build
8-
9-
# Allow building packages using `makepkg` within Docker container.
4+
# Permit building packages using `makepkg` or `yay` within Docker container.
105
# https://blog.ganssle.io/tag/arch-linux.html
11-
RUN pacman -Sy --noconfirm --needed base-devel binutils fakeroot git sudo
6+
RUN pacman -Sy --noconfirm --needed base-devel bc binutils fakeroot git sudo
127
RUN useradd --create-home build
138
RUN echo 'build ALL=NOPASSWD: ALL' >> /etc/sudoers
149

15-
# Install AUR package helper program `yay`.
10+
# Install `yay` for easily installing AUR packages.
1611
# https://aur.archlinux.org/packages/yay
1712
RUN mkdir /yay-bin; chmod ugo+rwX /yay-bin
18-
USER build
19-
RUN \
20-
git clone https://aur.archlinux.org/yay-bin.git && \
21-
cd yay-bin && \
22-
makepkg -si --noconfirm
23-
USER root
24-
13+
RUN true \
14+
&& git clone https://aur.archlinux.org/yay-bin.git \
15+
&& cd yay-bin \
16+
&& sudo --user=build -- makepkg -si --noconfirm
2517

26-
# --------------------------
27-
# Setup turbodbc environment
28-
# --------------------------
18+
RUN pacman -Sy --noconfirm --needed \
19+
boost pybind11 python-pip python-setuptools python-wheel uv
2920

30-
# Install Python, unixODBC, PostgreSQL ODBC driver, and turbodbc.
3121

32-
FROM archlinux-build
22+
# Set up ODBC.
23+
FROM build AS odbc
3324

34-
# Install unixODBC.
25+
# Install unixODBC and the PostgreSQL ODBC driver.
3526
# https://archlinux.org/packages/core/x86_64/unixodbc/
36-
RUN pacman -Sy --noconfirm --needed unixodbc
37-
38-
# Install PostgreSQL ODBC driver.
3927
# https://aur.archlinux.org/packages/psqlodbc
40-
USER build
41-
RUN yay -S --noconfirm psqlodbc
42-
USER root
28+
RUN pacman -Sy --noconfirm --needed \
29+
unixodbc
30+
RUN sudo --user=build -- yay -S --noconfirm --needed \
31+
psqlodbc
32+
33+
34+
# Apache Arrow and simdutf.
35+
FROM odbc AS arrow-simdutf
36+
37+
# Install Apache Arrow, NumPy, PyArrow.
38+
RUN pacman -Sy --noconfirm --needed \
39+
arrow python-numpy python-pyarrow
4340

44-
# Install NumPy, PyArrow, and turbodbc.
45-
RUN pacman -Sy --noconfirm --needed boost python python-pip python-setuptools
41+
# Install ultra-fast Unicode routines.
42+
# https://github.com/simdutf/simdutf
43+
RUN sudo --user=build -- yay -S --noconfirm --needed \
44+
simdutf-git
45+
46+
47+
# Set up turbodbc.
48+
FROM arrow-simdutf AS turbodbc
49+
50+
# Configure Python environment.
51+
ENV PIP_ROOT_USER_ACTION=ignore
52+
ENV PIP_BREAK_SYSTEM_PACKAGES=true
53+
ENV UV_BREAK_SYSTEM_PACKAGES=true
54+
ENV UV_COMPILE_BYTECODE=true
55+
ENV UV_LINK_MODE=copy
56+
ENV UV_PYTHON_DOWNLOADS=never
57+
ENV UV_SYSTEM_PYTHON=true
58+
59+
# Install turbodbc.
4660
ADD requirements*.txt .
47-
RUN pip install --upgrade --requirement=requirements-prereq.txt
48-
RUN MAKEFLAGS="-j$(nproc)" pip install --upgrade --requirement=requirements.txt --verbose
61+
RUN \
62+
--mount=type=cache,id=user-cache,target=/root/.cache \
63+
true \
64+
&& export WORKERS=$(bc -e "$(nproc) / 2") \
65+
&& MAKEFLAGS="-j${WORKERS}" uv pip install --upgrade --requirement=requirements.txt --verbose
66+
67+
68+
FROM turbodbc
Lines changed: 73 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,76 @@
1-
FROM quay.io/centos/centos:stream9
1+
# Set up Red Hat / CentOS / Alma Linux.
2+
FROM quay.io/centos/centos:stream9 AS build
3+
#FROM quay.io/centos/centos:stream10 AS build
4+
#FROM almalinux:9.5 AS build
25

3-
# Install Python, unixODBC, the PostgreSQL ODBC driver, and development libraries.
4-
RUN dnf install --enablerepo=crb -y boost-devel g++ postgresql-odbc python3 python3-devel python3-pip unixODBC-devel
6+
# Provide additional package repositories.
7+
RUN dnf install -y epel-release || sudo dnf install -y oracle-epel-release-el$(cut -d: -f5 /etc/system-release-cpe | cut -d. -f1) || sudo dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-$(cut -d: -f5 /etc/system-release-cpe | cut -d. -f1).noarch.rpm
8+
RUN dnf config-manager --set-enabled epel || :
9+
RUN dnf config-manager --set-enabled powertools || :
10+
RUN dnf config-manager --set-enabled crb || :
11+
RUN dnf config-manager --set-enabled ol$(cut -d: -f5 /etc/system-release-cpe | cut -d. -f1)_codeready_builder || :
12+
RUN dnf config-manager --set-enabled codeready-builder-for-rhel-$(cut -d: -f5 /etc/system-release-cpe | cut -d. -f1)-rhui-rpms || :
13+
RUN subscription-manager repos --enable codeready-builder-for-rhel-$(cut -d: -f5 /etc/system-release-cpe | cut -d. -f1)-$(arch)-rpms || :
514

6-
# Install Python, NumPy, PyArrow, and turbodbc.
15+
# Install recent Python and development libraries.
16+
RUN dnf install -y \
17+
bc boost-devel g++ git python3.12-devel python3.12-pip python3.12-pybind11
18+
19+
# Make recent Python version the default.
20+
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 0
21+
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 0
22+
RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3.12 0
23+
RUN update-alternatives --install /usr/bin/pip3 pip3 /usr/bin/pip3.12 0
24+
25+
26+
# Set up ODBC.
27+
FROM build AS odbc
28+
29+
# Install unixODBC and the PostgreSQL ODBC driver.
30+
RUN dnf install -y \
31+
postgresql-odbc unixODBC-devel
32+
33+
34+
# Apache Arrow and simdutf.
35+
FROM odbc AS arrow-simdutf
36+
37+
# Install Apache Arrow.
38+
# https://arrow.apache.org/install/
39+
RUN dnf install -y https://apache.jfrog.io/artifactory/arrow/almalinux/$(cut -d: -f5 /etc/system-release-cpe | cut -d. -f1)/apache-arrow-release-latest.rpm
40+
RUN dnf install -y \
41+
arrow-devel arrow-dataset-devel arrow-flight-devel arrow-flight-sql-devel parquet-devel
42+
43+
# Install ultra-fast Unicode routines.
44+
# https://github.com/simdutf/simdutf
45+
RUN true \
46+
&& git clone https://github.com/simdutf/simdutf --branch=v6.4.0 \
47+
&& cd simdutf \
48+
&& cmake -B build -DCMAKE_CXX_FLAGS=-Werror -DBUILD_SHARED_LIBS=ON \
49+
&& cmake --build build \
50+
&& cmake --install build
51+
52+
53+
# Set up turbodbc.
54+
FROM arrow-simdutf AS turbodbc
55+
56+
# Configure Python environment.
57+
ENV PIP_ROOT_USER_ACTION=ignore
58+
ENV PIP_BREAK_SYSTEM_PACKAGES=true
59+
ENV UV_BREAK_SYSTEM_PACKAGES=true
60+
ENV UV_COMPILE_BYTECODE=true
61+
ENV UV_LINK_MODE=copy
62+
ENV UV_PYTHON_DOWNLOADS=never
63+
ENV UV_SYSTEM_PYTHON=true
64+
65+
# Install NumPy, PyArrow, and turbodbc.
766
ADD requirements*.txt .
8-
RUN pip install --upgrade --requirement=requirements-prereq.txt
9-
RUN MAKEFLAGS="-j$(nproc)" pip install --upgrade --requirement=requirements.txt --verbose
67+
RUN \
68+
--mount=type=cache,id=user-cache,target=/root/.cache \
69+
true \
70+
&& pip install uv \
71+
&& uv pip install --upgrade --requirement=requirements-prereq.txt --verbose \
72+
&& export WORKERS=$(bc -e "$(nproc) / 2") \
73+
&& MAKEFLAGS="-j${WORKERS}" uv pip install --upgrade --requirement=requirements.txt --verbose
74+
75+
76+
FROM turbodbc
Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,70 @@
1-
FROM python:3.11-slim-bullseye
1+
# Set up Debian Linux.
2+
FROM debian:bookworm-slim AS build
23

4+
# Configure system environment.
35
ENV DEBIAN_FRONTEND=noninteractive
46

5-
# Install prerequisites.
7+
# Install Python, unixODBC, the PostgreSQL ODBC driver, and development libraries.
68
RUN apt-get update
7-
RUN apt-get install --yes build-essential libboost-dev odbc-postgresql unixodbc-dev
9+
RUN apt-get install --yes \
10+
bc build-essential cmake git libboost-dev pkg-config \
11+
python3-pip python3-pybind11
12+
13+
14+
# Set up ODBC.
15+
FROM build AS odbc
16+
17+
# Install unixODBC and the PostgreSQL ODBC driver.
18+
RUN apt-get install --yes \
19+
odbc-postgresql unixodbc-dev
20+
21+
22+
# Apache Arrow and simdutf.
23+
FROM odbc AS arrow-simdutf
24+
25+
# Install Apache Arrow.
26+
# https://arrow.apache.org/install/
27+
RUN apt install -y -V ca-certificates lsb-release wget
28+
RUN wget https://repo1.maven.org/maven2/org/apache/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
29+
RUN apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
30+
RUN apt-get update
31+
RUN apt-get install --yes \
32+
libarrow-dev libarrow-dataset-dev libarrow-acero-dev libarrow-flight-dev \
33+
libarrow-flight-sql-dev libgandiva-dev libparquet-dev
34+
35+
# Install ultra-fast Unicode routines.
36+
RUN true \
37+
&& git clone https://github.com/simdutf/simdutf --branch=v6.4.0 \
38+
&& cd simdutf \
39+
&& cmake -B build -DCMAKE_CXX_FLAGS=-Werror -DBUILD_SHARED_LIBS=ON \
40+
&& cmake --build build \
41+
&& cmake --install build
42+
43+
# Load shared library.
44+
RUN ldconfig /usr/local/lib/libsimdutf.so
45+
46+
47+
# Set up turbodbc.
48+
FROM arrow-simdutf AS turbodbc
49+
50+
# Configure Python environment.
51+
ENV PIP_ROOT_USER_ACTION=ignore
52+
ENV PIP_BREAK_SYSTEM_PACKAGES=true
53+
ENV UV_BREAK_SYSTEM_PACKAGES=true
54+
ENV UV_COMPILE_BYTECODE=true
55+
ENV UV_LINK_MODE=copy
56+
ENV UV_PYTHON_DOWNLOADS=never
57+
ENV UV_SYSTEM_PYTHON=true
858

959
# Install NumPy, PyArrow, and turbodbc.
1060
ADD requirements*.txt .
11-
RUN pip install --upgrade --requirement=requirements-prereq.txt
12-
RUN MAKEFLAGS="-j$(nproc)" pip install --upgrade --requirement=requirements.txt --verbose
61+
RUN \
62+
--mount=type=cache,id=user-cache,target=/root/.cache \
63+
true \
64+
&& pip install uv \
65+
&& uv pip install --upgrade --requirement=requirements-prereq.txt --verbose \
66+
&& export WORKERS=$(bc -e "$(nproc) / 2") \
67+
&& MAKEFLAGS="-j${WORKERS}" uv pip install --upgrade --requirement=requirements.txt --verbose
68+
69+
70+
FROM turbodbc
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# Set up openSUSE Linux with ODBC.
2+
FROM registry.opensuse.org/opensuse/tumbleweed:20250329 AS build
3+
#FROM registry.suse.com/suse/sle15 AS build
4+
5+
# Activate package repositories.
6+
RUN zypper --gpg-auto-import-keys refresh
7+
8+
# Install Python and development libraries.
9+
RUN zypper install -y \
10+
bc cmake git gcc-c++ \
11+
python313-devel python313-pip python313-pybind11 \
12+
update-alternatives uv
13+
14+
# Make recent Python version the default.
15+
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.13 0
16+
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.13 0
17+
RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3.13 0
18+
RUN update-alternatives --install /usr/bin/pip3 pip3 /usr/bin/pip3.13 0
19+
20+
21+
# Set up ODBC.
22+
FROM build AS odbc
23+
24+
# Install unixODBC and the PostgreSQL ODBC driver.
25+
RUN zypper install -y \
26+
psqlODBC unixODBC-devel
27+
28+
29+
# Apache Arrow and simdutf.
30+
FROM odbc AS arrow-simdutf
31+
32+
# Provide Apache Arrow by activating "scientific" package bundle.
33+
# https://software.opensuse.org/download/package?package=apache-arrow&project=science
34+
RUN zypper addrepo https://download.opensuse.org/repositories/science/openSUSE_Tumbleweed/science.repo
35+
RUN zypper --gpg-auto-import-keys refresh
36+
37+
# Install Apache Arrow.
38+
# https://arrow.apache.org/install/
39+
RUN zypper install -y \
40+
apache-arrow
41+
42+
# Install ultra-fast Unicode routines.
43+
RUN true \
44+
&& git clone https://github.com/simdutf/simdutf --branch=v6.4.0 \
45+
&& cd simdutf \
46+
&& cmake -B build -DCMAKE_CXX_FLAGS=-Werror -DBUILD_SHARED_LIBS=ON \
47+
&& cmake --build build \
48+
&& cmake --install build
49+
50+
# Load shared library.
51+
RUN ldconfig /usr/local/lib64/libsimdutf.so
52+
53+
54+
# Set up turbodbc.
55+
FROM arrow-simdutf AS turbodbc
56+
57+
# Configure Python environment.
58+
ENV PIP_ROOT_USER_ACTION=ignore
59+
ENV PIP_BREAK_SYSTEM_PACKAGES=true
60+
ENV UV_BREAK_SYSTEM_PACKAGES=true
61+
ENV UV_COMPILE_BYTECODE=true
62+
ENV UV_LINK_MODE=copy
63+
ENV UV_PYTHON_DOWNLOADS=never
64+
ENV UV_SYSTEM_PYTHON=true
65+
66+
# Install NumPy, PyArrow, and turbodbc.
67+
ADD requirements*.txt .
68+
RUN \
69+
--mount=type=cache,id=user-cache,target=/root/.cache \
70+
true \
71+
&& uv pip install --upgrade --requirement=requirements-prereq.txt --verbose \
72+
&& export WORKERS=$(bc -e "$(nproc) / 2") \
73+
&& MAKEFLAGS="-j${WORKERS}" uv pip install --upgrade --requirement=requirements.txt --verbose

by-language/python-turbodbc/dockerfiles/sles.Dockerfile

Lines changed: 0 additions & 24 deletions
This file was deleted.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Turbodbc wants NumPy and PyArrow to be installed upfront.
2-
numpy<1.25
3-
pyarrow<11
2+
numpy>=2
3+
pyarrow<20
44
wheel

0 commit comments

Comments
 (0)