Skip to content

Commit 58b897b

Browse files
alambCopilot
andauthored
Move arrow-pyarrow tests that require pyarrow to be installed into arrow-pyarrow-testing crate (#7742)
# Which issue does this PR close? - Related to #7394 - Closes #7736 # Rationale for this change At its core, if someone isn't using / modifying the pyarrow integration for arrow-rs they shouldn't have to install / configure python to get the tests working in `arrow-rs` - after the change in #7694 Running `cargo test --workspace` now also runs tests that require python to be setup and the `pyarrow` module to be installed. This is problematic because: 1. Some people may not have that environment setup 2. Apparently you can not use virtualenvs with py03 in Mac due to PyO3/pyo3#1741 The second item was very confusing for me while I tried to debug what going on as I ket getting an error about pyarrow not being installed, even though it was installed in my `venv`: ``` thread 'test_to_pyarrow' panicked at arrow-pyarrow/tests/pyarrow.rs:43:6: called `Result::unwrap()` on an `Err` value: PyErr { type: <class 'ModuleNotFoundError'>, value: ModuleNotFoundError("No module named 'pyarrow'"), traceback: None } ``` # What changes are included in this PR? 1. Move the tests that require pyarrow to be installed into `arrow-pyarrow-testing`, which is not part of the workspace and thus not run with `cargo test --all` 2. Remove `cargo test --exclude arrow-pyarrow` 3. Add documentation on rationale and hints about running the test # Frequently Asked Questions ## Why not add ` --exclude arrow-pyarrow` to `verify_release_candidate.sh`? While the minimal fix would be to add ` --exclude arrow-pyarrow` to verify_release_candidate.sh this requires all users of arrow to remember to add `--exclude arrow-pyarrow` to their tests even if they don't care about python ## Why not in `pyarrow-arrow-integration-testing` ? I did not put this test in `pyarrow-arrow-integration-testing` because that module doesn't compile for me with the stock python install Somehow python needs to be installed with the ability to make dynamic libraries that I haven't figured out and don't really want to. It seems maybe related to https://pyo3.rs/v0.18.1/getting_started#python (thanks to @Xuanwo for the pointer in PyO3/pyo3#2136 / apache/opendal#1675) ``` (venv) root@5e8d0406fabe:/arrow-rs/arrow-pyarrow-integration-testing# cargo test --test pyarrow warning: `/arrow-rs/arrow-pyarrow-integration-testing/.cargo/config` is deprecated in favor of `config.toml` note: if you need to support cargo 1.38 or earlier, you can symlink `config` to `config.toml` Compiling target-lexicon v0.13.2 Compiling flatbuffers v25.2.10 Compiling pyo3-build-config v0.24.2 Compiling arrow-ipc v55.2.0 (/arrow-rs/arrow-ipc) Compiling pyo3-macros-backend v0.24.2 Compiling pyo3-ffi v0.24.2 Compiling pyo3 v0.24.2 Compiling pyo3-macros v0.24.2 Compiling arrow-pyarrow v55.2.0 (/arrow-rs/arrow-pyarrow) Compiling arrow v55.2.0 (/arrow-rs/arrow) Compiling arrow-pyarrow-integration-testing v0.1.0 (/arrow-rs/arrow-pyarrow-integration-testing) error: linking with `cc` failed: exit status: 1 | = note: "cc" "/tmp/rustc0jx15I/symbols.o" "<43 object files omitted>" "-Wl,--as-needed" "-Wl,-Bstatic" "<sysroot>/lib/rustlib/aarch64-unknown-linux-gnu/lib/{libtest-*,libgetopts-*,libunicode_width-*,librustc_std_workspace_std-*}.rlib" "/arrow-rs/arrow-pyarrow-integration-testing/target/debug/deps/{libarrow-7996898a6777f964.rlib,libarrow_row-63508de6e52f4d4d.rlib,libarrow_pyarrow-8b510eeadc952ad2.rlib,libpyo3-c463c3a2243eeab9.rlib,libmemoffset-836dc1ddd866c614.rlib,libpyo3_ffi-fbf18d9f712874be.rlib,libunindent-2b8a456e13fa9700.rlib,libarrow_json-a7b4960a4b4d1cb5.rlib,libsimdutf8-7e080cbee40e41cd.rlib,libserde_json-0288fe0f1ec1bcde.rlib,libindexmap-ebb707a4eec26692.rlib,libequivalent-4762261bc2781d11.rlib,libarrow_ipc-085ebaaded386ff8.rlib,libflatbuffers-1f88fdf138129305.rlib,libarrow_csv-e5d679eef2b85a1b.rlib,libcsv-2288f6dec5308d9c.rlib,libitoa-fa5c9b2503c605f5.rlib,libserde-33ccdec93d601cce.rlib,libcsv_core-122def45831e6a2c.rlib,libarrow_string-17ebd7a5409511da.rlib,libregex-97f4021e65bafbca.rlib,libregex_automata-b62a0db5ace54d45.rlib,libaho_corasick-547ec01718db652c.rlib,libregex_syntax-f3065c7bb7c4592a.rlib,libmemchr-547fa7a4048cbc2e.rlib,libarrow_cast-0b7117723b343c65.rlib,libatoi-c9a52adfe9dd2564.rlib,libryu-243c2c0ae3ed75b4.rlib,libbase64-1cab23258b68443b.rlib,liblexical_core-c2a41d0a6941285f.rlib,liblexical_write_float-9d65854ce5ab8f07.rlib,liblexical_write_integer-894216b914487c18.rlib,liblexical_parse_float-274078b1af50d567.rlib,liblexical_parse_integer-781bcb0a42285559.rlib,liblexical_util-4a71e416d58e0125.rlib,libstatic_assertions-4f12831487497211.rlib,libarrow_arith-00bcbf2ec2eb3322.rlib,libarrow_ord-fcdece9f7e87a9bf.rlib,libarrow_select-31b4c3cfa277427b.rlib,libarrow_array-c2dc23f827508dc6.rlib,libahash-d573f36c088b3179.rlib,libgetrandom-31b11224f8e2ea08.rlib,liblibc-045cef5bc264baa9.rlib,libonce_cell-83f4df333969eacb.rlib,libzerocopy-0db6330db7e4b762.rlib,libhashbrown-2c7527cd2fd4322d.rlib,libchrono-6d1bc7062186f166.rlib,libiana_time_zone-875b10f893e8f81d.rlib,libarrow_data-be090acb3cb83adb.rlib,libarrow_schema-25469a5878e8c886.rlib,libbitflags-2614952e3652d907.rlib,libarrow_buffer-8eb56dc26cbe25a3.rlib,libbytes-8b0f150f04d16150.rlib,libhalf-ed72603b54882276.rlib,libcfg_if-9dbfdc9eaf8f6a2d.rlib,libnum-436acb7880d5b290.rlib,libnum_iter-87f263003ea3e8dd.rlib,libnum_rational-d812b535c653cc6e.rlib,libnum_complex-c12c249f79450951.rlib,libnum_bigint-ff983ebd6646ce72.rlib,libnum_integer-f946a0e48063a631.rlib,libnum_traits-d0a5f363c632fb69.rlib}.rlib" "<sysroot>/lib/rustlib/aarch64-unknown-linux-gnu/lib/{libstd-*,libpanic_unwind-*,libobject-*,libmemchr-*,libaddr2line-*,libgimli-*,librustc_demangle-*,libstd_detect-*,libhashbrown-*,librustc_std_workspace_alloc-*,libminiz_oxide-*,libadler2-*,libunwind-*,libcfg_if-*,liblibc-*,liballoc-*,librustc_std_workspace_core-*,libcore-*,libcompiler_builtins-*}.rlib" "-Wl,-Bdynamic" "-lgcc_s" "-lutil" "-lrt" "-lpthread" "-lm" "-ldl" "-lc" "-L" "/tmp/rustc0jx15I/raw-dylibs" "-Wl,--eh-frame-hdr" "-Wl,-z,noexecstack" "-L" "<sysroot>/lib/rustlib/aarch64-unknown-linux-gnu/lib" "-o" "/arrow-rs/arrow-pyarrow-integration-testing/target/debug/deps/pyarrow-00909cd9e7866a35" "-Wl,--gc-sections" "-pie" "-Wl,-z,relro,-z,now" "-nodefaultlibs" = note: some arguments are omitted. use `--verbose` to show all linker arguments = note: /usr/bin/ld: /arrow-rs/arrow-pyarrow-integration-testing/target/debug/deps/libarrow_pyarrow-8b510eeadc952ad2.rlib(arrow_pyarrow-8b510eeadc952ad2.8xxa5xo5oql7wlj24034o033n.rcgu.o): in function `<pyo3::instance::Borrowed<pyo3::types::tuple::PyTuple> as pyo3::call::PyCallArgs>::call_positional': /root/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/pyo3-0.24.2/src/call.rs:213: undefined reference to `PyObject_Call' ``` # Are there any user-facing changes? If there are user-facing changes then we may require documentation to be updated before approving the PR. If there are any breaking changes to public APIs, please call them out. --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent df837a4 commit 58b897b

File tree

6 files changed

+101
-5
lines changed

6 files changed

+101
-5
lines changed

.github/workflows/integration.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,9 @@ jobs:
165165
- name: Run Rust tests
166166
run: |
167167
source venv/bin/activate
168-
cargo test -p arrow-pyarrow
169-
- name: Run tests
168+
cd arrow-pyarrow-testing
169+
cargo test
170+
- name: Run Python tests
170171
run: |
171172
source venv/bin/activate
172173
cd arrow-pyarrow-integration-testing

.github/workflows/rust.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252
# do not produce debug symbols to keep memory usage down
5353
export RUSTFLAGS="-C debuginfo=0"
5454
# PyArrow tests happen in integration.yml.
55-
cargo test --workspace --exclude arrow-pyarrow
55+
cargo test --workspace
5656
5757
5858
# Check workspace wide compile and test with default features for
@@ -84,8 +84,7 @@ jobs:
8484
# do not produce debug symbols to keep memory usage down
8585
export RUSTFLAGS="-C debuginfo=0"
8686
export PATH=$PATH:/d/protoc/bin
87-
# PyArrow tests happen in integration.yml.
88-
cargo test --workspace --exclude arrow-pyarrow
87+
cargo test --workspace
8988
9089
9190
# Run cargo fmt for all crates

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ members = [
5555
resolver = "2"
5656

5757
exclude = [
58+
# arrow-pyarrow-testing is excluded because it requires a Python interpreter with the pyarrow package installed,
59+
# which makes running `cargo test --all` fail if the appropriate Python environment is not set up.
60+
"arrow-pyarrow-testing",
5861
# arrow-pyarrow-integration-testing is excluded because it requires different compilation flags, thereby
5962
# significantly changing how it is compiled within the workspace, causing the whole workspace to be compiled from
6063
# scratch this way, this is a stand-alone package that compiles independently of the others.

arrow-pyarrow-testing/Cargo.toml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# Note this package is not published to crates.io, it is only used for testing
19+
# the arrow-pyarrow crate in the arrow-rs repository.
20+
#
21+
# It is not part of the workspace so that `cargo test --all` does not require
22+
# a Python interpreter or the pyarrow package to be installed.
23+
#
24+
# It is used to run tests that require a Python interpreter and the pyarrow
25+
# package installed. It is not intended to be used as a library or a standalone
26+
# application.
27+
#
28+
# It is different from `arrow-pyarrow-integration-testing` in that it works
29+
# with a standard pyarrow installation, rather than building a dynamic library
30+
# that can be loaded by Python (which requires additional configuraton of the
31+
# Python environment).
32+
33+
[package]
34+
name = "arrow-pyarrow-testing"
35+
description = "Tests for arrow-pyarrow that require only a Python interpreter and pyarrow installed"
36+
version = "0.1.0"
37+
homepage = "https://github.com/apache/arrow-rs"
38+
repository = "https://github.com/apache/arrow-rs"
39+
authors = ["Apache Arrow <dev@arrow.apache.org>"]
40+
license = "Apache-2.0"
41+
keywords = [ "arrow" ]
42+
edition = "2021"
43+
rust-version = "1.81"
44+
publish = false
45+
46+
47+
[dependencies]
48+
# Note no dependency on arrow, to ensure arrow-pyarrow can be used by itself
49+
arrow-array = { path = "../arrow-array" }
50+
arrow-pyarrow = { path = "../arrow-pyarrow" }
51+
pyo3 = { version = "0.25", default-features = false }

arrow-pyarrow-testing/src/lib.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! This crate exists to provide a test environment for the `arrow-pyarrow` crate.
19+
//! It is not intended to be used by itself. See comments in Cargo.toml for more
20+
//! details.

arrow-pyarrow/tests/pyarrow.rs renamed to arrow-pyarrow-testing/tests/pyarrow.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,28 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! Tests pyarrow bindings
19+
//!
20+
//! This test requires installing the `pyarrow` python package. If you do not
21+
//! have this package installed, you will see an error such as the following:
22+
//!
23+
//! ```text
24+
//! PyErr { type: <class 'ModuleNotFoundError'>, value: ModuleNotFoundError("No module named 'pyarrow'"), traceback: None }
25+
//! ```
26+
//!
27+
//! # Notes
28+
//!
29+
//! You can not use a virtual environment to run these tests on MacOS, as it will
30+
//! fail to find the pyarrow module due to <https://github.com/PyO3/pyo3/issues/1741>
31+
//!
32+
//! One way to run them is to install the `pyarrow` package in the system Python,
33+
//! which might break other packages, so use with caution:
34+
//!
35+
//! ```shell
36+
//! brew install pipx
37+
//! pip3 install --break-system-packages pyarrow
38+
//! ```
39+
1840
use arrow_array::builder::{BinaryViewBuilder, StringViewBuilder};
1941
use arrow_array::{
2042
Array, ArrayRef, BinaryViewArray, Int32Array, RecordBatch, StringArray, StringViewArray,

0 commit comments

Comments
 (0)