diff --git a/.gitignore b/.gitignore index b33e085fdf..9e075536c3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ # pkg and bin directories currently contain build artifacts # only so we exclude them. bin/ +!pkg/data_cache/src/head/bin/ +!pkg/data_cache/src/worker/bin/ manifests/external-crds/ # IDEs @@ -20,9 +22,17 @@ artifacts __pycache__/ *.egg-info/ +# Environment variables +.env + # Coverage cover.out # Helm charts/kubeflow-trainer/charts/ charts/kubeflow-trainer/Chart.lock + +# data_cache +pkg/data_cache/target +pkg/data_cache/src/client/target/ +hack/data_cache/*.log diff --git a/cmd/data_cache/Dockerfile b/cmd/data_cache/Dockerfile new file mode 100644 index 0000000000..7229371e07 --- /dev/null +++ b/cmd/data_cache/Dockerfile @@ -0,0 +1,50 @@ +FROM rust:1.85-bullseye AS builder + +WORKDIR /workspace + +ENV RUST_LOG=info + +# Install system dependencies +RUN apt-get update && \ + apt-get -y install libssl-dev openssl zlib1g zlib1g-dev libpq-dev cmake protobuf-compiler netcat curl && \ + rm -rf /var/lib/apt/lists/* + +# Install cargo-chef for better caching +RUN rustup update && cargo install cargo-chef --version 0.1.62 + +# Copy manifests for dependency caching +COPY pkg/data_cache/Cargo.toml pkg/data_cache/Cargo.lock ./ + +# Cache dependencies +RUN cargo fetch + +# Copy source code +COPY pkg/data_cache/ . + +# Run tests +RUN cargo test --tests + +# Build binaries in release mode +RUN cargo build --release --bin head --bin worker + +# Stage 2: Create a minimal runtime image +FROM debian:bookworm-slim AS runtime + +# Install runtime dependencies +RUN apt-get update && \ + apt-get -y install ca-certificates && \ + rm -rf /var/lib/apt/lists/* + +# Copy binaries from builder stage +COPY --from=builder /workspace/target/release/head /usr/local/bin/head +COPY --from=builder /workspace/target/release/worker /usr/local/bin/worker + +# Create non-root user +RUN groupadd -r cache_user && useradd -r -g cache_user cache_user + +# Change ownership and switch to non-root user +RUN chown -R cache_user:cache_user /usr/local/bin/ +USER cache_user + +# Set default command +CMD ["head"] diff --git a/hack/data_cache/run_with_remote_table.sh b/hack/data_cache/run_with_remote_table.sh new file mode 100755 index 0000000000..a3acca0877 --- /dev/null +++ b/hack/data_cache/run_with_remote_table.sh @@ -0,0 +1,88 @@ +#!/bin/bash + +# Check for required arguments +if [ $# -lt 5 ]; then + echo "Usage: $0 [environment]" + exit 1 +fi + +IAM_ROLE_ARN="$1" +METADATA_LOC="$2" +TABLE_NAME="$3" +SCHEMA_NAME="$4" +AWS_PROFILE="$5" +ENVIRONMENT="${6:-LOCAL}" + +echo "Using IAM Role: $IAM_ROLE_ARN" +echo "Metadata Location: $METADATA_LOC" +echo "Table Name: $TABLE_NAME" +echo "Schema Name: $SCHEMA_NAME" +echo "Environment: $ENVIRONMENT" +echo "AWS Profile: $AWS_PROFILE" + +role_output=$(aws sts assume-role --role-arn "$IAM_ROLE_ARN" --role-session-name "RoleSession1" --profile "$AWS_PROFILE") + +# Parse the JSON output using jq +export AWS_ACCESS_KEY_ID=$(echo $role_output | jq -r '.Credentials.AccessKeyId') +export AWS_SECRET_ACCESS_KEY=$(echo $role_output | jq -r '.Credentials.SecretAccessKey') +export AWS_SESSION_TOKEN=$(echo $role_output | jq -r '.Credentials.SessionToken') +export AWS_REGION=us-west-2 +export AWS_EC2_METADATA_DISABLED=true + +# Set required environment variables for testing +export METADATA_LOC="$METADATA_LOC" +export TABLE_NAME="$TABLE_NAME" +export SCHEMA_NAME="$SCHEMA_NAME" +export RUNTIME_ENV="$ENVIRONMENT" + +# Function to cleanup processes on exit +cleanup() { + echo "" + echo "Stopping services..." + kill -9 $WORKER1_PID $WORKER2_PID $HEAD_PID 2>/dev/null || true + wait $WORKER1_PID $WORKER2_PID $HEAD_PID 2>/dev/null || true + rm -rf /tmp/test_metadata + exit 0 +} + +# Set up signal handlers for graceful shutdown +trap cleanup SIGINT SIGTERM + +# Function to check if a service is ready +check_service_ready() { + local host=$1 + local port=$2 + local service_name=$3 + + echo "Waiting for $service_name to be ready on $host:$port..." + while ! nc -z "$host" "$port" 2>/dev/null; do + echo " $service_name not ready yet, waiting 2 seconds..." + sleep 2 + done + echo " $service_name is ready!" +} + +echo "Starting worker node 1..." +cargo run --bin worker -- 0.0.0.0 50052 > worker1.log 2>&1 & +WORKER1_PID=$! + +echo "Starting worker node 2..." +cargo run --bin worker -- 0.0.0.0 50053 > worker2.log 2>&1 & +WORKER2_PID=$! + +# Wait for both workers to be ready +check_service_ready localhost 50052 "worker1" +check_service_ready localhost 50053 "worker2" + +echo "Both workers are ready, starting head node..." +cargo run --bin head -- 0.0.0.0 50051 > head.log 2>&1 & +HEAD_PID=$! + +check_service_ready localhost 50051 "head" + +echo "All services are running. Press Ctrl+C to stop all services." +wait + +#echo "Running client test..." +#cd src/client && cargo run 2>&1 +#CLIENT_EXIT_CODE=$? \ No newline at end of file diff --git a/pkg/data_cache/Cargo.lock b/pkg/data_cache/Cargo.lock new file mode 100644 index 0000000000..042cc6c0d4 --- /dev/null +++ b/pkg/data_cache/Cargo.lock @@ -0,0 +1,5541 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.15", + "once_cell", + "version_check", +] + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.2.15", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "allocator-api2" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" + +[[package]] +name = "apache-avro" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aef82843a0ec9f8b19567445ad2421ceeb1d711514384bdd3d49fe37102ee13" +dependencies = [ + "bigdecimal", + "digest", + "libflate", + "log", + "num-bigint", + "quad-rand", + "rand 0.8.5", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "strum 0.26.3", + "strum_macros 0.26.4", + "thiserror 1.0.69", + "typed-builder 0.19.1", + "uuid", +] + +[[package]] +name = "array-init" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc" + +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "arrow" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1bb018b6960c87fd9d025009820406f74e83281185a8bdcb44880d2aa5c9a87" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44de76b51473aa888ecd6ad93ceb262fb8d40d1f1154a4df2f069b3590aa7575" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "num", +] + +[[package]] +name = "arrow-array" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29ed77e22744475a9a53d00026cf8e166fe73cf42d89c4c4ae63607ee1cfcc3f" +dependencies = [ + "ahash 0.8.11", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.15.2", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0391c96eb58bf7389171d1e103112d3fc3e5625ca6b372d606f2688f1ea4cce" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f39e1d774ece9292697fcbe06b5584401b26bd34be1bec25c33edae65c2420ff" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-csv" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9055c972a07bf12c2a827debfd34f88d3b93da1941d36e1d9fee85eebe38a12a" +dependencies = [ + "arrow-array", + "arrow-cast", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "regex", +] + +[[package]] +name = "arrow-data" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf75ac27a08c7f48b88e5c923f267e980f27070147ab74615ad85b5c5f90473d" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-flight" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91efc67a4f5a438833dd76ef674745c80f6f6b9a428a3b440cbfbf74e32867e6" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-ipc", + "arrow-schema", + "base64", + "bytes", + "futures", + "prost", + "prost-types", + "tonic", +] + +[[package]] +name = "arrow-ipc" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a222f0d93772bd058d1268f4c28ea421a603d66f7979479048c429292fac7b2e" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "flatbuffers", + "lz4_flex", +] + +[[package]] +name = "arrow-json" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9085342bbca0f75e8cb70513c0807cc7351f1fbf5cb98192a67d5e3044acb033" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap 2.9.0", + "lexical-core", + "memchr", + "num", + "serde", + "serde_json", + "simdutf8", +] + +[[package]] +name = "arrow-ord" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab2f1065a5cad7b9efa9e22ce5747ce826aa3855766755d4904535123ef431e7" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", +] + +[[package]] +name = "arrow-row" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3703a0e3e92d23c3f756df73d2dc9476873f873a76ae63ef9d3de17fda83b2d8" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] + +[[package]] +name = "arrow-schema" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73a47aa0c771b5381de2b7f16998d351a6f4eb839f1e13d48353e17e873d969b" + +[[package]] +name = "arrow-select" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24b7b85575702b23b85272b01bc1c25a01c9b9852305e5d0078c79ba25d995d4" +dependencies = [ + "ahash 0.8.11", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9260fddf1cdf2799ace2b4c2fc0356a9789fa7551e0953e35435536fecefebbd" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax 0.8.5", +] + +[[package]] +name = "arrow_cache" +version = "0.1.0" +dependencies = [ + "arrow", + "arrow-flight", + "arrow-schema", + "async-trait", + "bincode", + "bytes", + "datafusion", + "fastrand", + "futures", + "iceberg", + "iceberg-datafusion", + "object_store", + "serde", + "serde_json", + "tokio", + "tonic", + "tracing", + "tracing-subscriber", + "trust-dns-resolver", + "url", +] + +[[package]] +name = "as-any" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063" + +[[package]] +name = "async-compression" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +dependencies = [ + "bzip2", + "flate2", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", +] + +[[package]] +name = "async-lock" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18" +dependencies = [ + "event-listener", + "event-listener-strategy", + "pin-project-lite", +] + +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "async-trait" +version = "0.1.88" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper", + "tower 0.5.2", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", +] + +[[package]] +name = "backon" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "592277618714fbcecda9a02ba7a8781f319d26532a88553bbacc77ba5d2b3a8d" +dependencies = [ + "fastrand", + "gloo-timers", + "tokio", +] + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bigdecimal" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", + "serde", +] + +[[package]] +name = "bimap" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "borsh" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2506947f73ad44e344215ccd6403ac2ae18cd8e046e581a441bf8d199f257f03" +dependencies = [ + "borsh-derive", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2593a3b8b938bd68373196c9832f516be11fa487ef4ae745eb282e6a56a7244" +dependencies = [ + "once_cell", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "brotli" +version = "8.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9991eea70ea4f293524138648e41ee89b0b2b12ddef3b255effa43c8056e0e0d" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "bytemuck" +version = "1.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9134a6ef01ce4b366b50689c94f82c14bc72bc5d0386829828a2e2752ef7958c" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + +[[package]] +name = "cc" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f34d93e62b03caf570cccc334cbc6c2fceca82f39211051345108adcba3eebdc" +dependencies = [ + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "chrono-tz" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" +dependencies = [ + "parse-zoneinfo", + "phf_codegen", +] + +[[package]] +name = "comfy-table" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" +dependencies = [ + "strum 0.26.3", + "strum_macros 0.26.4", + "unicode-width", +] + +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.15", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + +[[package]] +name = "cpufeatures" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32c" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "darling" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.101", +] + +[[package]] +name = "darling_macro" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "dary_heap" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "data-encoding" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "575f75dfd25738df5b91b8e43e14d44bda14637a58fae779fd2b064f8bf3e010" + +[[package]] +name = "datafusion" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe060b978f74ab446be722adb8a274e052e005bf6dfd171caadc3abaad10080" +dependencies = [ + "arrow", + "arrow-ipc", + "arrow-schema", + "async-trait", + "bytes", + "bzip2", + "chrono", + "datafusion-catalog", + "datafusion-catalog-listing", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-table", + "datafusion-functions-window", + "datafusion-macros", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "flate2", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "parquet", + "rand 0.8.5", + "regex", + "sqlparser", + "tempfile", + "tokio", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-catalog" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61fe34f401bd03724a1f96d12108144f8cd495a3cdda2bf5e091822fb80b7e66" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4411b8e3bce5e0fc7521e44f201def2e2d5d1b5f176fb56e8cdc9942c890f00" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "log", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-common" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0734015d81c8375eb5d4869b7f7ecccc2ee8d6cb81948ef737cd0e7b743bd69c" +dependencies = [ + "ahash 0.8.11", + "arrow", + "arrow-ipc", + "base64", + "half", + "hashbrown 0.14.5", + "indexmap 2.9.0", + "libc", + "log", + "object_store", + "parquet", + "paste", + "recursive", + "sqlparser", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common-runtime" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5167bb1d2ccbb87c6bc36c295274d7a0519b14afcfdaf401d53cbcaa4ef4968b" +dependencies = [ + "futures", + "log", + "tokio", +] + +[[package]] +name = "datafusion-datasource" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04e602dcdf2f50c2abf297cc2203c73531e6f48b29516af7695d338cf2a778b1" +dependencies = [ + "arrow", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "flate2", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "parquet", + "rand 0.8.5", + "tempfile", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-datasource-csv" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bb2253952dc32296ed5b84077cb2e0257fea4be6373e1c376426e17ead4ef6" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "regex", + "tokio", +] + +[[package]] +name = "datafusion-datasource-json" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b8c7f47a5d2fe03bfa521ec9bafdb8a5c82de8377f60967c3663f00c8790352" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "serde_json", + "tokio", +] + +[[package]] +name = "datafusion-datasource-parquet" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27d15868ea39ed2dc266728b554f6304acd473de2142281ecfa1294bb7415923" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "parquet", + "rand 0.8.5", + "tokio", +] + +[[package]] +name = "datafusion-doc" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a91f8c2c5788ef32f48ff56c68e5b545527b744822a284373ac79bba1ba47292" + +[[package]] +name = "datafusion-execution" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06f004d100f49a3658c9da6fb0c3a9b760062d96cd4ad82ccc3b7b69a9fb2f84" +dependencies = [ + "arrow", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "log", + "object_store", + "parking_lot", + "rand 0.8.5", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a4e4ce3802609be38eeb607ee72f6fe86c3091460de9dbfae9e18db423b3964" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap 2.9.0", + "paste", + "recursive", + "serde_json", + "sqlparser", +] + +[[package]] +name = "datafusion-expr-common" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "422ac9cf3b22bbbae8cdf8ceb33039107fde1b5492693168f13bd566b1bcc839" +dependencies = [ + "arrow", + "datafusion-common", + "indexmap 2.9.0", + "itertools 0.14.0", + "paste", +] + +[[package]] +name = "datafusion-functions" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ddf0a0a2db5d2918349c978d42d80926c6aa2459cd8a3c533a84ec4bb63479e" +dependencies = [ + "arrow", + "arrow-buffer", + "base64", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", + "hex", + "itertools 0.14.0", + "log", + "md-5", + "rand 0.8.5", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "408a05dafdc70d05a38a29005b8b15e21b0238734dab1e98483fcb58038c5aba" +dependencies = [ + "ahash 0.8.11", + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "756d21da2dd6c9bef97af1504970ff56cbf35d03fbd4ffd62827f02f4d2279d4" +dependencies = [ + "ahash 0.8.11", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-functions-nested" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d8d50f6334b378930d992d801a10ac5b3e93b846b39e4a05085742572844537" +dependencies = [ + "arrow", + "arrow-ord", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-macros", + "datafusion-physical-expr-common", + "itertools 0.14.0", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-table" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc9a97220736c8fff1446e936be90d57216c06f28969f9ffd3b72ac93c958c8a" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", +] + +[[package]] +name = "datafusion-functions-window" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cefc2d77646e1aadd1d6a9c40088937aedec04e68c5f0465939912e1291f8193" +dependencies = [ + "datafusion-common", + "datafusion-doc", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd4aff082c42fa6da99ce0698c85addd5252928c908eb087ca3cfa64ff16b313" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-macros" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df6f88d7ee27daf8b108ba910f9015176b36fbc72902b1ca5c2a5f1d1717e1a1" +dependencies = [ + "datafusion-expr", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "datafusion-optimizer" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "084d9f979c4b155346d3c34b18f4256e6904ded508e9554d90fed416415c3515" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "indexmap 2.9.0", + "itertools 0.14.0", + "log", + "recursive", + "regex", + "regex-syntax 0.8.5", +] + +[[package]] +name = "datafusion-physical-expr" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64c536062b0076f4e30084065d805f389f9fe38af0ca75bcbac86bc5e9fbab65" +dependencies = [ + "ahash 0.8.11", + "arrow", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap 2.9.0", + "itertools 0.14.0", + "log", + "paste", + "petgraph", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8a92b53b3193fac1916a1c5b8e3f4347c526f6822e56b71faa5fb372327a863" +dependencies = [ + "ahash 0.8.11", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "itertools 0.14.0", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fa0a5ac94c7cf3da97bedabd69d6bbca12aef84b9b37e6e9e8c25286511b5e2" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "itertools 0.14.0", + "log", + "recursive", +] + +[[package]] +name = "datafusion-physical-plan" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "690c615db468c2e5fe5085b232d8b1c088299a6c63d87fd960a354a71f7acb55" +dependencies = [ + "ahash 0.8.11", + "arrow", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap 2.9.0", + "itertools 0.14.0", + "log", + "parking_lot", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "datafusion-session" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad229a134c7406c057ece00c8743c0c34b97f4e72f78b475fe17b66c5e14fa4f" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-sql" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64f6ab28b72b664c21a27b22a2ff815fd390ed224c26e89a93b5a8154a4e8607" +dependencies = [ + "arrow", + "bigdecimal", + "datafusion-common", + "datafusion-expr", + "indexmap 2.9.0", + "log", + "recursive", + "regex", + "sqlparser", +] + +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", + "serde", +] + +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.101", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "const-oid", + "crypto-common", + "subtle", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "dissimilar" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8975ffdaa0ef3661bfe02dbdcc06c9f829dfafe6a3c474de366a8d5e44276921" + +[[package]] +name = "dlv-list" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" +dependencies = [ + "const-random", +] + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "enum-as-inner" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "event-listener" +version = "5.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6032be9bd27023a771701cc49f9f053c751055f71efb2e0ae5c15809093675ba" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c3e4e0dd3673c1139bf041f3008816d9cf2946bbfac2945c09e523b8d7b05b2" +dependencies = [ + "event-listener", + "pin-project-lite", +] + +[[package]] +name = "expect-test" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63af43ff4431e848fb47472a920f14fa71c24de13255a5692e93d4e90302acb0" +dependencies = [ + "dissimilar", + "once_cell", +] + +[[package]] +name = "fastrand" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" + +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "flatbuffers" +version = "25.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +dependencies = [ + "bitflags", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" +dependencies = [ + "crc32fast", + "libz-rs-sys", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generator" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6bd114ceda131d3b1d665eba35788690ad37f5916457286b32ab6fd3c438dd" +dependencies = [ + "cfg-if", + "libc", + "log", + "rustversion", + "windows", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.13.3+wasi-0.2.2", + "windows-targets 0.52.6", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "gloo-timers" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "h2" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap 2.9.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.11", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "hostname" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" +dependencies = [ + "libc", + "match_cfg", + "winapi", +] + +[[package]] +name = "http" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "hyper" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +dependencies = [ + "futures-util", + "http", + "hyper", + "hyper-util", + "rustls", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", + "webpki-roots", +] + +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "pin-project-lite", + "socket2 0.5.8", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core 0.52.0", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "iceberg" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "306fd4bf70d30687dc765110ecd19fc2bb21f16c3d5c188bc53a0d573bb6e675" +dependencies = [ + "anyhow", + "apache-avro", + "array-init", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-ord", + "arrow-schema", + "arrow-select", + "arrow-string", + "as-any", + "async-trait", + "backon", + "base64", + "bimap", + "bytes", + "chrono", + "derive_builder", + "expect-test", + "fnv", + "futures", + "itertools 0.13.0", + "moka", + "murmur3", + "num-bigint", + "once_cell", + "opendal", + "ordered-float 4.5.0", + "parquet", + "rand 0.8.5", + "reqwest", + "roaring", + "rust_decimal", + "serde", + "serde_bytes", + "serde_derive", + "serde_json", + "serde_repr", + "serde_with", + "strum 0.27.2", + "thrift", + "tokio", + "typed-builder 0.20.0", + "url", + "uuid", + "zstd", +] + +[[package]] +name = "iceberg-datafusion" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad75c713bc381ef0c6f7d592c3d4493ddf404bd98e82d87f9d775d3be2387216" +dependencies = [ + "anyhow", + "async-trait", + "datafusion", + "futures", + "iceberg", + "tokio", +] + +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", + "serde", +] + +[[package]] +name = "indexmap" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +dependencies = [ + "equivalent", + "hashbrown 0.15.2", + "serde", +] + +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "io-uring" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + +[[package]] +name = "ipconfig" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" +dependencies = [ + "socket2 0.5.8", + "widestring", + "windows-sys 0.48.0", + "winreg", +] + +[[package]] +name = "ipnet" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" + +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lexical-core" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" + +[[package]] +name = "libflate" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45d9dfdc14ea4ef0900c1cddbc8dcd553fbaacd8a4a282cf4018ae9dd04fb21e" +dependencies = [ + "adler32", + "core2", + "crc32fast", + "dary_heap", + "libflate_lz77", +] + +[[package]] +name = "libflate_lz77" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d" +dependencies = [ + "core2", + "hashbrown 0.14.5", + "rle-decode-fast", +] + +[[package]] +name = "libm" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" + +[[package]] +name = "libz-rs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6489ca9bd760fe9642d7644e827b0c9add07df89857b0416ee15c1cc1a3b8c5a" +dependencies = [ + "zlib-rs", +] + +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "lru-cache" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c" +dependencies = [ + "linked-hash-map", +] + +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" +dependencies = [ + "twox-hash 1.6.3", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "match_cfg" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +dependencies = [ + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys 0.52.0", +] + +[[package]] +name = "moka" +version = "0.12.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9321642ca94a4282428e6ea4af8cc2ca4eac48ac7a6a4ea8f33f76d0ce70926" +dependencies = [ + "async-lock", + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "event-listener", + "futures-util", + "loom", + "parking_lot", + "portable-atomic", + "rustc_version", + "smallvec", + "tagptr", + "thiserror 1.0.69", + "uuid", +] + +[[package]] +name = "murmur3" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b" + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", + "serde", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "object" +version = "0.36.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +dependencies = [ + "memchr", +] + +[[package]] +name = "object_store" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d94ac16b433c0ccf75326388c893d2835ab7457ea35ab8ba5d745c053ef5fa16" +dependencies = [ + "async-trait", + "base64", + "bytes", + "chrono", + "form_urlencoded", + "futures", + "http", + "http-body-util", + "humantime", + "hyper", + "itertools 0.14.0", + "md-5", + "parking_lot", + "percent-encoding", + "quick-xml", + "rand 0.9.2", + "reqwest", + "ring", + "serde", + "serde_json", + "serde_urlencoded", + "thiserror 2.0.3", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "opendal" +version = "0.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb9838d0575c6dbaf3fcec7255af8d5771996d4af900bbb6fa9a314dec00a1a" +dependencies = [ + "anyhow", + "backon", + "base64", + "bytes", + "chrono", + "crc32c", + "futures", + "getrandom 0.2.15", + "http", + "http-body", + "log", + "md-5", + "percent-encoding", + "quick-xml", + "reqsign", + "reqwest", + "serde", + "serde_json", + "tokio", + "uuid", +] + +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ordered-float" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c65ee1f9701bf938026630b455d5315f490640234259037edb259798b3bcf85e" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ordered-multimap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" +dependencies = [ + "dlv-list", + "hashbrown 0.14.5", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + +[[package]] +name = "parquet" +version = "55.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be7b2d778f6b841d37083ebdf32e33a524acde1266b5884a8ca29bf00dfa1231" +dependencies = [ + "ahash 0.8.11", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.15.2", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash 2.1.0", + "zstd", +] + +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset", + "indexmap 2.9.0", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared", + "rand 0.8.5", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + +[[package]] +name = "portable-atomic" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro-crate" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools 0.13.0", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost", +] + +[[package]] +name = "psm" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f58e5423e24c18cc840e1c98370b3993c6649cd1678b4d24318bcf0a083cbe88" +dependencies = [ + "cc", +] + +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "quad-rand" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "quinn" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" +dependencies = [ + "bytes", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls", + "socket2 0.5.8", + "thiserror 2.0.3", + "tokio", + "tracing", +] + +[[package]] +name = "quinn-proto" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" +dependencies = [ + "bytes", + "getrandom 0.2.15", + "rand 0.8.5", + "ring", + "rustc-hash", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.3", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5a626c6807713b15cac82a6acaccd6043c9a5408c24baae07611fec3f243da" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2 0.5.8", + "tracing", + "windows-sys 0.59.0", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.3", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.3", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.15", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.1", +] + +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.101", +] + +[[package]] +name = "redox_syscall" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.9", + "regex-syntax 0.8.5", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.5", +] + +[[package]] +name = "regex-lite" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "reqsign" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" +dependencies = [ + "anyhow", + "async-trait", + "base64", + "chrono", + "form_urlencoded", + "getrandom 0.2.15", + "hex", + "hmac", + "home", + "http", + "log", + "percent-encoding", + "quick-xml", + "rand 0.8.5", + "reqwest", + "rust-ini", + "serde", + "serde_json", + "sha1", + "sha2", + "tokio", +] + +[[package]] +name = "reqwest" +version = "0.12.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d19c46a6fdd48bc4dab94b6103fccc55d34c67cc0ad04653aad4ea2a07cd7bbb" +dependencies = [ + "base64", + "bytes", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-native-certs", + "rustls-pemfile", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls", + "tokio-util", + "tower 0.5.2", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "webpki-roots", + "windows-registry", +] + +[[package]] +name = "resolv-conf" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52e44394d2086d010551b14b53b1f24e31647570cd1deb0379e2c21b329aba00" +dependencies = [ + "hostname", + "quick-error", +] + +[[package]] +name = "ring" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.15", + "libc", + "spin", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rkyv" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "rle-decode-fast" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" + +[[package]] +name = "roaring" +version = "0.10.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e8d2cfa184d94d0726d650a9f4a1be7f9b76ac9fdb954219878dc00c1c1e7b" +dependencies = [ + "bytemuck", + "byteorder", +] + +[[package]] +name = "rust-ini" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e310ef0e1b6eeb79169a1171daf9abcb87a2e17c03bee2c4bb100b55c75409f" +dependencies = [ + "cfg-if", + "ordered-multimap", + "trim-in-place", +] + +[[package]] +name = "rust_decimal" +version = "1.37.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faa7de2ba56ac291bd90c6b9bece784a52ae1411f9506544b3eae36dd2356d50" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand 0.8.5", + "rkyv", + "serde", + "serde_json", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustc-hash" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "0.38.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustls" +version = "0.23.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1" +dependencies = [ + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +dependencies = [ + "web-time", +] + +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "schannel" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + +[[package]] +name = "security-framework" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + +[[package]] +name = "seq-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" + +[[package]] +name = "serde" +version = "1.0.215" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_bytes" +version = "0.11.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.215" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "serde_json" +version = "1.0.140" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "serde_repr" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_with" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e28bdad6db2b8340e449f7108f020b3b092e8583a9e3fb82713e1d4e71fe817" +dependencies = [ + "base64", + "chrono", + "hex", + "indexmap 1.9.3", + "indexmap 2.9.0", + "serde", + "serde_derive", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d846214a9854ef724f3da161b426242d8de7c1fc7de2f89bb1efcb154dca79d" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "socket2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "socket2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "sqlparser" +version = "0.55.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" +dependencies = [ + "log", + "recursive", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "stacker" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601f9201feb9b09c00266478bf459952b9ef9a6b94edb2f21eba14ab681a60a9" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +dependencies = [ + "strum_macros 0.27.2", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.101", +] + +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tempfile" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" +dependencies = [ + "cfg-if", + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +dependencies = [ + "thiserror-impl 2.0.3", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "threadpool" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" +dependencies = [ + "num_cpus", +] + +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "log", + "ordered-float 2.10.1", + "threadpool", +] + +[[package]] +name = "time" +version = "0.3.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.47.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +dependencies = [ + "backtrace", + "bytes", + "io-uring", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "slab", + "socket2 0.6.0", + "tokio-macros", + "windows-sys 0.59.0", +] + +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +dependencies = [ + "rustls", + "rustls-pki-types", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "toml_datetime" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" + +[[package]] +name = "toml_edit" +version = "0.22.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +dependencies = [ + "indexmap 2.9.0", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64", + "bytes", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", + "socket2 0.5.8", + "tokio", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 1.9.3", + "pin-project", + "pin-project-lite", + "rand 0.8.5", + "slab", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "tracing-core" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "trim-in-place" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "343e926fc669bc8cde4fa3129ab681c63671bae288b1f1081ceee6d9d37904fc" + +[[package]] +name = "trust-dns-proto" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3119112651c157f4488931a01e586aa459736e9d6046d3bd9105ffb69352d374" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner", + "futures-channel", + "futures-io", + "futures-util", + "idna 0.4.0", + "ipnet", + "once_cell", + "rand 0.8.5", + "smallvec", + "thiserror 1.0.69", + "tinyvec", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "trust-dns-resolver" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a3e6c3aff1718b3c73e395d1f35202ba2ffa847c6a62eea0db8fb4cfe30be6" +dependencies = [ + "cfg-if", + "futures-util", + "ipconfig", + "lru-cache", + "once_cell", + "parking_lot", + "rand 0.8.5", + "resolv-conf", + "smallvec", + "thiserror 1.0.69", + "tokio", + "tracing", + "trust-dns-proto", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + +[[package]] +name = "twox-hash" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908" + +[[package]] +name = "typed-builder" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06fbd5b8de54c5f7c91f6fe4cebb949be2125d7758e630bb58b1d831dbce600" +dependencies = [ + "typed-builder-macro 0.19.1", +] + +[[package]] +name = "typed-builder" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e14ed59dc8b7b26cacb2a92bad2e8b1f098806063898ab42a3bd121d7d45e75" +dependencies = [ + "typed-builder-macro 0.20.0", +] + +[[package]] +name = "typed-builder-macro" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "typed-builder-macro" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "560b82d656506509d43abe30e0ba64c56b1953ab3d4fe7ba5902747a7a3cedd5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-bidi" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" + +[[package]] +name = "unicode-ident" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" + +[[package]] +name = "unicode-normalization" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +dependencies = [ + "form_urlencoded", + "idna 1.0.3", + "percent-encoding", +] + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "uuid" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" +dependencies = [ + "getrandom 0.3.1", + "js-sys", + "serde", + "wasm-bindgen", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn 2.0.101", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "web-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "0.26.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d642ff16b7e79272ae451b7322067cdc17cadf68c23264be9d94a32319efe7e" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "widestring" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7219d36b6eac893fa81e84ebe06485e7dcbb616177469b142df14f1f4deb1311" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" +dependencies = [ + "windows-core 0.58.0", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-core" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-result 0.2.0", + "windows-strings 0.1.0", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-implement" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "windows-interface" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + +[[package]] +name = "windows-registry" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" +dependencies = [ + "windows-result 0.3.2", + "windows-strings 0.3.1", + "windows-targets 0.53.0", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result 0.2.0", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + +[[package]] +name = "winnow" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" +dependencies = [ + "memchr", +] + +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags", +] + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "zlib-rs" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "868b928d7949e09af2f6086dfc1e01936064cc7a819253bce650d4e2a2d63ba8" + +[[package]] +name = "zstd" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.12+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/pkg/data_cache/Cargo.toml b/pkg/data_cache/Cargo.toml new file mode 100644 index 0000000000..02affb4c22 --- /dev/null +++ b/pkg/data_cache/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "arrow_cache" +version = "0.1.0" +edition = "2024" + +[dependencies] +tracing = "0.1.41" +tracing-subscriber = "0.3" +tokio = "1.41.1" +datafusion = "47.0.0" +arrow = "55.0.0" +arrow-schema = "55.0.0" +serde = { version = "1.0.188", features = ["derive"] } +serde_json = "1.0" +async-trait = "0.1.86" +iceberg-datafusion = "0.6.0" +iceberg = "0.6.0" +futures = "0.3.31" +arrow-flight = "55.0.0" +tonic = "0.12.3" +bytes = "1.9.0" +bincode = "1.3" +trust-dns-resolver = "*" +object_store = { version = "0.12.1", features = ["aws"] } +url = "2.5.4" +fastrand = "2.1.1" + +[dev-dependencies] +tokio = { version = "1.41.1", features = ["full"] } + +[[bin]] +name = "head" +path = "src/head/bin/main.rs" + +[[bin]] +name = "worker" +path = "src/worker/bin/main.rs" diff --git a/pkg/data_cache/OWNERS b/pkg/data_cache/OWNERS new file mode 100644 index 0000000000..4761918f0f --- /dev/null +++ b/pkg/data_cache/OWNERS @@ -0,0 +1,2 @@ +approvers: + - akshaychitneni diff --git a/pkg/data_cache/README.md b/pkg/data_cache/README.md new file mode 100644 index 0000000000..852b6877c5 --- /dev/null +++ b/pkg/data_cache/README.md @@ -0,0 +1,89 @@ +# Kubeflow Data Cache + +## Development Setup + +### Build the project +```bash +cargo build +``` + +### Build in release mode +```bash +cargo build --release +``` + +## Docker Build Instructions + +### Build the Docker image +```bash +docker build -f cmd/data_cache/Dockerfile -t arrow_cache . +``` + +### Run the head service +```bash +docker run -p 50051:50051 arrow_cache head +``` + +### Run the worker service +```bash +docker run -p 50052:50052 arrow_cache worker +``` + +## Running the System + +### Option 1: Remote Table Testing + +Run the system with remote table configuration using IAM roles: + +```bash +../../hack/data_cache/run_with_remote_table.sh [environment] +``` + +**Example:** +```bash +../../hack/data_cache/run_with_remote_table.sh \ + arn:aws:iam:::role/ \ + s3a:// \ + \ + \ + \ + LOCAL +``` + +**Parameters:** +- `iam-role-arn` (required): IAM role ARN for AWS access +- `metadata-loc` (required): S3 location of the metadata file +- `table-name` (required): Name of the table +- `schema-name` (required): Name of the schema +- `aws-profile` (required): AWS profile name +- `environment` (optional): Runtime environment (defaults to "LOCAL") + +This script will: +1. Assume the specified IAM role +2. Set up AWS credentials and environment variables +3. Start two worker nodes (ports 50052, 50053) +4. Start the head node (port 50051) +5. Wait for all services to be ready + +Press `Ctrl+C` to stop all services. + +## Testing + +### Run Client Test +```bash +cd src/client +cargo run --bin client -- --endpoint http://localhost:50051 --local-rank 2 --world-size 4 +``` + +## Environment Configuration + +The system supports two runtime environments: +- **Local Development**: Set `RUNTIME_ENV=LOCAL` to use localhost workers on ports 50052/50053 +- **Kubernetes/LWS**: Uses `LWS_LEADER_ADDRESS` and `LWS_GROUP_SIZE` for service discovery + +## Prerequisites + +- Rust and Cargo +- AWS CLI configured with appropriate credentials +- `jq` for JSON parsing +- `nc` (netcat) for service health checks diff --git a/pkg/data_cache/src/client/Cargo.lock b/pkg/data_cache/src/client/Cargo.lock new file mode 100644 index 0000000000..bf26e2f88d --- /dev/null +++ b/pkg/data_cache/src/client/Cargo.lock @@ -0,0 +1,1844 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "const-random", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +dependencies = [ + "windows-sys 0.60.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.60.2", +] + +[[package]] +name = "anyhow" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" + +[[package]] +name = "arrow-array" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.15.2", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "169b1d5d6cb390dd92ce582b06b23815c7953e9dfaaea75556e89d890d19993d" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-data" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-flight" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cb3e1d2b441e6d1d5988e3f7c4523c9466b18ef77d7c525d92d36d4cad49fbe" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-ipc", + "arrow-schema", + "base64", + "bytes", + "futures", + "prost", + "prost-types", + "tonic", +] + +[[package]] +name = "arrow-ipc" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "flatbuffers", +] + +[[package]] +name = "arrow-schema" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292" + +[[package]] +name = "arrow-select" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "async-trait" +version = "0.1.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper 1.0.2", + "tower 0.5.1", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper 1.0.2", + "tower-layer", + "tower-service", +] + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" + +[[package]] +name = "cc" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f34d93e62b03caf570cccc334cbc6c2fceca82f39211051345108adcba3eebdc" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-link", +] + +[[package]] +name = "clap" +version = "4.5.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50fd97c9dc2399518aa331917ac6f274280ec5eb34e555dd291899745c48ec6f" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c35b5830294e1fa0462034af85cc95225a4cb07092c088c55bda3147cfcd8f65" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" + +[[package]] +name = "client" +version = "0.1.0" +dependencies = [ + "arrow-flight", + "bincode", + "bytes", + "clap", + "futures", + "serde", + "tokio", + "tonic", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "flatbuffers" +version = "25.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +dependencies = [ + "bitflags", + "rustc_version", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "h2" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap 2.7.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +dependencies = [ + "equivalent", + "hashbrown 0.15.2", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" + +[[package]] +name = "js-sys" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a865e038f7f6ed956f788f0d7d60c541fff74c7bd74272c5d4cf15c63743e705" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lexical-core" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.167" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" + +[[package]] +name = "libm" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.52.0", +] + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "object" +version = "0.36.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pin-project" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustversion" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + +[[package]] +name = "serde" +version = "1.0.215" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.215" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "socket2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tokio" +version = "1.41.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "pin-project-lite", + "socket2", + "tokio-macros", + "windows-sys 0.52.0", +] + +[[package]] +name = "tokio-macros" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-stream" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64", + "bytes", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", + "socket2", + "tokio", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 1.9.3", + "pin-project", + "pin-project-lite", + "rand", + "slab", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper 0.1.2", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "unicode-ident" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d15e63b4482863c109d70a7b8706c1e364eb6ea449b201a76c5b89cedcec2d5c" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d36ef12e3aaca16ddd3f67922bc63e48e953f126de60bd33ccc0101ef9998cd" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "705440e08b42d3e4b36de7d66c944be628d579796b8090bfa3471478a2260051" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98c9ae5a76e46f4deecd0f0255cc223cfa18dc9b261213b8aa0c7b36f61b3f1d" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ee99da9c5ba11bd675621338ef6fa52296b76b83305e9b6e5c77d4c286d6d49" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.3", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/pkg/data_cache/src/client/Cargo.toml b/pkg/data_cache/src/client/Cargo.toml new file mode 100644 index 0000000000..a97487c0e9 --- /dev/null +++ b/pkg/data_cache/src/client/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "client" +version = "0.1.0" +edition = "2024" + +[dependencies] +futures = "0.3.31" +arrow-flight = "55.0.0" +tokio = { version = "1.41.1", features = ["rt-multi-thread", "macros"] } +tonic = "0.12.3" +bytes = "1.9.0" +bincode = "1.3" +serde = { version = "1.0.215", features = ["derive"] } +tracing = "0.1.41" +tracing-subscriber = "0.3.19" +clap = { version = "4.0", features = ["derive", "env"] } + + +[[bin]] +name = "client" +path = "src/main.rs" diff --git a/pkg/data_cache/src/client/src/main.rs b/pkg/data_cache/src/client/src/main.rs new file mode 100644 index 0000000000..9e4f4f809e --- /dev/null +++ b/pkg/data_cache/src/client/src/main.rs @@ -0,0 +1,108 @@ +//! Sample client for validating access to cached datasets in the Arrow-based caching system. +//! +//! This client demonstrates how to connect to the head node and stream cached data +//! from worker nodes using Apache Arrow Flight protocol. It serves as a reference +//! implementation for accessing and consuming distributed cached datasets. + +use arrow_flight::decode::FlightRecordBatchStream; +use futures::stream::StreamExt; +use arrow_flight::flight_service_client::FlightServiceClient; +use tonic::transport::Channel; +use arrow_flight::FlightDescriptor; +use futures::TryStreamExt; +use serde::{Deserialize, Serialize}; +use tracing::{info, error}; +use tracing_subscriber; +use clap::Parser; + +/// Index pair structure for dataset partitioning +#[derive(Serialize, Deserialize, Debug)] +struct IndexPair { + start: u64, + end: u64, +} + +/// Command line arguments for the data cache client +#[derive(Parser, Debug)] +#[command(name = "data-cache-client")] +#[command(about = "Sample client for accessing cached datasets via Arrow Flight protocol")] +struct Args { + /// Head node endpoint URL + #[arg(long, env = "HEAD_NODE_ENDPOINT", default_value = "http://localhost:50051")] + endpoint: String, + + /// Local rank for this client instance + #[arg(long, env = "LOCAL_RANK", default_value = "1")] + local_rank: String, + + /// Total number of ranks in the distributed setup + #[arg(long, env = "WORLD_SIZE", default_value = "3")] + world_size: String, +} +/// Sample client main function that demonstrates accessing cached datasets. +/// +/// This function: +/// 1. Connects to the configurable head node endpoint +/// 2. Requests flight information for a specific rank/partition +/// 3. Streams data from worker nodes using Arrow Flight protocol +/// 4. Validates that cached data can be successfully accessed and consumed +#[tokio::main] +async fn main() -> Result<(), Box> { + tracing_subscriber::fmt().init(); + + let args = Args::parse(); + + let channel = Channel::from_shared(args.endpoint.clone())?.connect().await?; + let mut client = FlightServiceClient::new(channel); + + let descriptor = FlightDescriptor { + r#type: 1, + cmd: Default::default(), + path: vec![args.local_rank.clone(), args.world_size.clone()], + }; + + let response = client.get_flight_info(descriptor).await?; + let flight_info = response.into_inner(); + + for endpoint in flight_info.endpoint { + for location in endpoint.location { + let mut client = connect_to_host(&location.uri).await?; + let request = tonic::Request::new(endpoint.ticket.clone().ok_or("No ticket found in endpoint")?); + let response = client.do_get(request).await?.into_inner(); + + let mut record_batch_stream = FlightRecordBatchStream::new_from_flight_data( + response.map_err(|e| e.into()) + ); + + while let Some(batch) = record_batch_stream.next().await { + match batch { + Ok(record_batch) => { + info!("Read batch with {} rows", record_batch.num_rows()); + // println!("{:?}", record_batch); + }, + Err(e) => { + error!("error: {}", e) + } + } + } + } + } + + Ok(()) +} + +/// Connects to a worker node endpoint for streaming cached dataset data. +/// +/// This helper function establishes a connection to a worker node and returns +/// a Flight service client for data streaming operations. +/// +/// # Arguments +/// * `endpoint` - The worker node endpoint URI +/// +/// # Returns +/// * `Result, Box>` - Connected client or error +async fn connect_to_host(endpoint: &str) -> Result, Box> { + let address = format!("{}", endpoint); + info!("{}", address); + Ok(FlightServiceClient::connect(address).await?) +} diff --git a/pkg/data_cache/src/config/config.rs b/pkg/data_cache/src/config/config.rs new file mode 100644 index 0000000000..8b5041a2b4 --- /dev/null +++ b/pkg/data_cache/src/config/config.rs @@ -0,0 +1,78 @@ +use std::env; +use std::sync::Arc; +use std::time::Duration; + +/// Configuration for dataset metadata and table information. +/// +/// **Important**: The `schema_name` here refers to the **Iceberg schema namespace**, +/// not Arrow schemas. The distributed caching system uses two separate Arrow schemas: +/// +/// 1. **Metadata Schema**: Created by head node for worker coordination +/// 2. **Data Schema**: Converted from Iceberg schema by worker nodes +/// +/// This config provides the Iceberg table coordinates that workers use to +/// retrieve the original data schema and convert it to Arrow format. +#[derive(Debug, Clone)] +pub struct DatasetConfig { + /// Location of Iceberg table metadata (e.g., S3 path to metadata.json) + pub metadata_loc: String, + /// Iceberg schema namespace (NOT Arrow schema - used for table identification) + pub schema_name: String, + /// Iceberg table name within the schema namespace + pub table_name: String, +} + +/// Comprehensive configuration for the data cache system +/// Consolidates all environment variables used across the application +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct CacheConfig { + pub dataset: DatasetConfig, + pub connect_timeout: Duration, +} + +impl DatasetConfig { + pub fn from_env() -> Result> { + let metadata_loc = env::var("METADATA_LOC")?; + let schema_name = env::var("SCHEMA_NAME")?; + let table_name = env::var("TABLE_NAME")?; + Ok(DatasetConfig { + metadata_loc, + schema_name, + table_name, + }) + } +} + +#[allow(dead_code)] +impl CacheConfig { + pub fn from_env() -> Result> { + let dataset = DatasetConfig::from_env()?; + + // Connection timeout with default of 20 seconds + let connect_timeout = env::var("CONNECT_TIMEOUT_SECS") + .ok() + .and_then(|s| s.parse::().ok()) + .map(Duration::from_secs) + .unwrap_or(Duration::from_secs(20)); + + Ok(CacheConfig { + dataset, + connect_timeout, + }) + } + + /// Create shared configuration from environment variables + /// Returns Arc for efficient sharing across components + #[allow(dead_code)] + pub fn shared_from_env() -> Result, Box> { + Ok(Arc::new(Self::from_env()?)) + } + + /// Create a new configuration with custom timeout + #[allow(dead_code)] + pub fn with_timeout(mut self, timeout: Duration) -> Self { + self.connect_timeout = timeout; + self + } +} diff --git a/pkg/data_cache/src/config/mod.rs b/pkg/data_cache/src/config/mod.rs new file mode 100644 index 0000000000..ef68c36943 --- /dev/null +++ b/pkg/data_cache/src/config/mod.rs @@ -0,0 +1 @@ +pub mod config; diff --git a/pkg/data_cache/src/head/bin/main.rs b/pkg/data_cache/src/head/bin/main.rs new file mode 100644 index 0000000000..664e12f96a --- /dev/null +++ b/pkg/data_cache/src/head/bin/main.rs @@ -0,0 +1,144 @@ +use std::env; +use std::fs; +use std::time::Duration; +use tokio::time::sleep; +use tracing::{info, warn, error, debug}; +use tracing_subscriber; +use trust_dns_resolver::TokioAsyncResolver; +use trust_dns_resolver::config::*; +use serde::{Deserialize, Serialize}; +use serde_json; + +#[path = "../mod.rs"] +mod head; + +/// Worker configuration structure for parsing worker-mapping.json +#[derive(Debug, Clone, Serialize, Deserialize)] +struct WorkerConfig { + host: String, + port: u16, +} + +/// Root structure for worker-mapping.json +#[derive(Debug, Clone, Serialize, Deserialize)] +struct WorkerMapping { + workers: Vec, +} + +/// Load worker configuration from /etc/arrow-cache/worker-mapping.json if it exists +/// Returns None if file doesn't exist or can't be parsed +fn load_worker_config() -> Option> { + let config_path = "/etc/arrow-cache/worker-mapping.json"; + + info!("🔍 Checking for worker config file at: {}", config_path); + + match fs::read_to_string(config_path) { + Ok(content) => { + info!("✅ Successfully read worker config file"); + debug!("📄 Worker config file content: {}", content); + + match serde_json::from_str::(&content) { + Ok(mapping) => { + let workers: Vec = mapping.workers + .into_iter() + .map(|w| format!("http://{}:{}", w.host, w.port)) + .collect(); + + info!("🎯 Loaded {} workers from config file: {:?}", workers.len(), workers); + Some(workers) + } + Err(e) => { + error!("❌ Failed to parse worker config JSON: {}", e); + warn!("📝 Config file content was: {}", content); + None + } + } + } + Err(e) => { + info!("ℹ️ Worker config file not found or not readable: {}", e); + info!("🔄 Will fall back to environment variable construction"); + None + } + } +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + tracing_subscriber::fmt().init(); + + let args: Vec = std::env::args().collect(); + info!("🚀 Arguments passed to head: {:?}", &args[1..]); + + // Print all relevant environment variables for debugging + info!("🔍 Environment Variables:"); + if let Ok(runtime_env) = env::var("RUNTIME_ENV") { + info!(" RUNTIME_ENV = {}", runtime_env); + } else { + info!(" RUNTIME_ENV = "); + } + + if let Ok(lws_leader) = env::var("LWS_LEADER_ADDRESS") { + info!(" LWS_LEADER_ADDRESS = {}", lws_leader); + } + + if let Ok(lws_size) = env::var("LWS_GROUP_SIZE") { + info!(" LWS_GROUP_SIZE = {}", lws_size); + } + + if let Ok(worker_service) = env::var("WORKER_SERVICE_NAME") { + info!(" WORKER_SERVICE_NAME = {}", worker_service); + } + + let mut rpc_hosts = Vec::new(); + + // First, try to load worker configuration from config file + if let Some(config_workers) = load_worker_config() { + info!("✅ Using worker configuration from config file"); + rpc_hosts = config_workers; + } else if env::var("RUNTIME_ENV").is_ok() { + info!("🏠 Using localhost configuration (RUNTIME_ENV is set)"); + rpc_hosts.push(format!("{}:{}", "localhost", "50052")); + rpc_hosts.push(format!("{}:{}", "localhost", "50053")); + } else { + info!("🔧 Using LWS environment variable construction"); + + let lws_leader_address = env::var("LWS_LEADER_ADDRESS")?; + let lws_size: i32 = env::var("LWS_GROUP_SIZE")?.parse()?; + let rpc_port = 50051; + + info!("🎯 LWS Configuration:"); + info!(" Leader Address: {}", lws_leader_address); + info!(" Group Size: {}", lws_size); + info!(" RPC Port: {}", rpc_port); + + let service_tokens: Vec<&str> = lws_leader_address.split('.').collect(); + info!(" Service Tokens: {:?}", service_tokens); + + let _resolver = + TokioAsyncResolver::tokio(ResolverConfig::default(), ResolverOpts::default()); + + for i in 1..lws_size { + let host = format!( + "{}-{}.{}", + service_tokens[0], + i, + service_tokens[1..].join(".") + ); + + info!("🔗 Constructing worker host {}: {}", i, host); + + sleep(Duration::from_secs(10)).await; + + let worker_url = format!("http://{}:{}", host, rpc_port); + info!("➕ Adding worker: {}", worker_url); + rpc_hosts.push(worker_url); + } + } + + info!("🎯 Final RPC Hosts: {:?}", rpc_hosts); + let host = args.get(1).ok_or("Missing host argument")?; + let port = args.get(2).ok_or("Missing port argument")?; + + info!("🌐 Starting head service on {}:{}", host, port); + head::head_service::run(host, port, rpc_hosts).await +} diff --git a/pkg/data_cache/src/head/head.rs b/pkg/data_cache/src/head/head.rs new file mode 100644 index 0000000000..c269133a15 --- /dev/null +++ b/pkg/data_cache/src/head/head.rs @@ -0,0 +1,296 @@ +use super::config::config::CacheConfig; +use crate::head::provider::DataFileTableProvider; +use crate::head::writer::DistributedWriterExec; +use arrow::array::UInt64Array; +use arrow_schema::SchemaRef; +use datafusion::datasource::MemTable; +use datafusion::error::{DataFusionError, Result}; +use datafusion::physical_expr::Partitioning; +use datafusion::physical_plan::execute_stream; +use datafusion::physical_plan::repartition::RepartitionExec; +use datafusion::prelude::SessionContext; +use datafusion::sql::TableReference; +use futures::StreamExt; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Duration; +use tokio::time::interval; +use tracing::{error, info, warn}; + +pub struct Distributor { + ctx: Arc, + num_workers: usize, + data_file_provider: Arc, + mem_table_name: String, + worker_map: Arc>, + metadata_schema: SchemaRef, + pub(crate) total_row_count: i64, + config: Arc, + retry_task_handle: Option>, +} + +impl Distributor { + pub fn new( + ctx: Arc, + num_workers: usize, + data_file_provider: Arc, + mem_table_name: String, + worker_map: Arc>, + metadata_schema: SchemaRef, + config: Arc, + ) -> Self { + Self { + ctx, + num_workers, + data_file_provider, + mem_table_name, + worker_map, + metadata_schema, + total_row_count: 0, + config, + retry_task_handle: None, + } + } + + pub fn context(&self) -> Arc { + self.ctx.clone() + } + + pub async fn init(&mut self) -> Result<()> { + let _ = self.fetch_data_files().await; + let df = self + .ctx + .sql("select * from memtable") + .await? + .collect() + .await?; + let _ = arrow::util::pretty::print_batches(&df); + + let df = self + .ctx + .sql("SELECT MAX(row_end_indexes) AS max FROM memtable") + .await?; + let results = df.collect().await?; + + if let Some(batch) = results.first() { + let column = batch.column(0); + let max_value = column + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Execution("Failed to downcast to UInt64Array".to_string()) + })? + .value(0); + self.total_row_count = (max_value + 1) as i64; + info!("Total num of rows: {}", self.total_row_count); + } + + self.distribute_data_files().await?; + + // Start periodic retry task if configured + self.start_periodic_retry_task().await; + + Ok(()) + } + + pub async fn get_workers_to_connect(&self, start: u64, end: u64) -> Result> { + info!("start: {}, end: {}", start, end); + let df = self.ctx.sql(format!("SELECT worker_ids FROM memtable WHERE row_start_indexes <= {} AND row_end_indexes >= {}", end, start).as_str()).await?; + let results = df.collect().await?; + let _ = arrow::util::pretty::print_batches(&results); + + let mut string_results: Vec = Vec::new(); + + for batch in results { + for row in 0..batch.num_rows() { + let mut row_string = String::new(); + let array = batch.column(0); + let value = array + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Execution("Failed to downcast to UInt64Array".to_string()) + })? + .value(row); + let url = self.worker_map.get(&value.to_string()).ok_or_else(|| { + DataFusionError::Execution(format!("Worker {} not found in worker map", value)) + })?; + row_string.push_str(url); + string_results.push(row_string); + } + } + Ok(string_results) + } + + pub async fn fetch_data_files(&mut self) -> Result<()> { + let memtable = MemTable::load( + self.data_file_provider.clone(), + Some(self.num_workers), + &self.ctx.state(), + ) + .await + .map_err(|err: DataFusionError| { + error!("Error loading table: {}", err); + err + })?; + self.ctx + .register_table(self.mem_table_name.clone(), Arc::new(memtable)) + .map_err(|err: DataFusionError| { + error!("Failed to register table: {}", err); + err + })?; + Ok(()) + } + + pub async fn distribute_and_setup(&mut self) -> Result<()> { + // Calculate total row count first + let df = self + .ctx + .sql("SELECT MAX(row_end_indexes) AS max FROM memtable") + .await?; + let results = df.collect().await?; + + if let Some(batch) = results.first() { + let column = batch.column(0); + let max_value = column + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Execution("Failed to downcast to UInt64Array".to_string()) + })? + .value(0); + self.total_row_count = (max_value + 1) as i64; + info!("Total num of rows: {}", self.total_row_count); + } + + // Distribute data to workers + self.distribute_data_files().await?; + + // Start periodic retry task if configured + self.start_periodic_retry_task().await; + + Ok(()) + } + + async fn distribute_data_files(&self) -> Result<()> { + let table = self + .ctx + .table_provider(TableReference::parse_str(&self.mem_table_name)) + .await + .map_err(|err: DataFusionError| { + error!("Error retrieving table: {}", err); + err + })?; + let plan = table.scan(&self.ctx.state(), None, &[], None).await?; + let plan = RepartitionExec::try_new(plan, Partitioning::RoundRobinBatch(self.num_workers))?; + let plan = DistributedWriterExec::new( + Arc::new(plan), + self.worker_map.clone(), + self.metadata_schema.clone(), + self.num_workers, + self.config.clone(), + ); + let _ = execute_stream(Arc::new(plan), self.ctx.task_ctx())? + .collect::>() + .await; + Ok(()) + } + + /// Start a background task that periodically retries data distribution to any workers + /// that may have failed during initial distribution or restarted + async fn start_periodic_retry_task(&mut self) { + let retry_interval_seconds: u64 = + std::env::var("ARROW_CACHE_PERIODIC_RETRY_INTERVAL_SECONDS") + .unwrap_or_else(|_| "30".to_string()) // Default: retry every 30 seconds + .parse() + .unwrap_or(30); + + if retry_interval_seconds == 0 { + info!("Periodic retry disabled (ARROW_CACHE_PERIODIC_RETRY_INTERVAL_SECONDS=0)"); + return; + } + + info!( + "Starting periodic retry task (interval: {}s)", + retry_interval_seconds + ); + + let ctx = self.ctx.clone(); + let num_workers = self.num_workers; + let mem_table_name = self.mem_table_name.clone(); + let worker_map = self.worker_map.clone(); + let metadata_schema = self.metadata_schema.clone(); + let config = self.config.clone(); + + let handle = tokio::spawn(async move { + let mut retry_interval = interval(Duration::from_secs(retry_interval_seconds)); + retry_interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + + loop { + retry_interval.tick().await; + + info!("Attempting periodic data redistribution to workers"); + + match Self::periodic_distribute_data_files( + &ctx, + num_workers, + &mem_table_name, + worker_map.clone(), + metadata_schema.clone(), + config.clone(), + ) + .await + { + Ok(_) => { + info!("Periodic data redistribution completed successfully"); + } + Err(e) => { + warn!("Periodic data redistribution failed: {}", e); + } + } + } + }); + + self.retry_task_handle = Some(handle); + } + + /// Periodic version of distribute_data_files that can be called from background task + async fn periodic_distribute_data_files( + ctx: &Arc, + num_workers: usize, + mem_table_name: &str, + worker_map: Arc>, + metadata_schema: SchemaRef, + config: Arc, + ) -> Result<()> { + let table = ctx + .table_provider(TableReference::parse_str(mem_table_name)) + .await + .map_err(|err: DataFusionError| { + error!("Error retrieving table for periodic retry: {}", err); + err + })?; + let plan = table.scan(&ctx.state(), None, &[], None).await?; + let plan = RepartitionExec::try_new(plan, Partitioning::RoundRobinBatch(num_workers))?; + let plan = DistributedWriterExec::new( + Arc::new(plan), + worker_map, + metadata_schema, + num_workers, + config, + ); + let _ = execute_stream(Arc::new(plan), ctx.task_ctx())? + .collect::>() + .await; + Ok(()) + } +} + +/// Cleanup implementation to properly shutdown retry task +impl Drop for Distributor { + fn drop(&mut self) { + if let Some(handle) = self.retry_task_handle.take() { + handle.abort(); + } + } +} diff --git a/pkg/data_cache/src/head/head_service.rs b/pkg/data_cache/src/head/head_service.rs new file mode 100644 index 0000000000..bddc817414 --- /dev/null +++ b/pkg/data_cache/src/head/head_service.rs @@ -0,0 +1,687 @@ +use crate::head::head::Distributor; +use crate::head::provider::DataFileTableProvider; +use arrow_flight::{ + Action, Criteria, Empty, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, + HandshakeRequest, HandshakeResponse, Location, PollInfo, PutResult, SchemaResult, Ticket, + flight_service_server::{FlightService, FlightServiceServer}, +}; +use arrow_schema::{DataType, Field, Schema, SchemaRef}; +use bincode; +use bytes::Bytes; +use datafusion::prelude::SessionContext; +use futures::Stream; +use std::collections::HashMap; +use std::pin::Pin; +use std::sync::Arc; +use tonic::{Request, Response, Status, Streaming}; +use tracing::{info, error}; + +/// Head node service implementing Apache Arrow Flight protocol for distributed query coordination. +/// +/// **IMPORTANT**: This service handles **coordination metadata** only and does NOT +/// deal with actual data schemas. The head node uses a **metadata schema** for +/// worker coordination, while workers separately manage **data schemas** for +/// actual data processing. +/// +/// # Dual Schema Architecture - Head Node Coordination +/// +/// The distributed caching system uses **two completely separate Arrow schemas**: +/// +/// ## 1. **Metadata Schema** (Head Node - This Service): +/// - **Purpose**: Coordinate worker assignments and data distribution +/// - **Created by**: [`metadata_arrow_schema()`] function +/// - **Contains**: `worker_ids`, `row_start_indexes`, `row_end_indexes`, `file_paths` +/// - **Used for**: Flight protocol coordination, worker task distribution +/// - **Location**: Head node only (this service) +/// +/// ## 2. **Data Schema** (Worker Nodes): +/// - **Purpose**: Describe actual data structure being cached/queried +/// - **Created by**: Converting Iceberg schema to Arrow in [`WorkerDataSource`] +/// - **Contains**: Actual data columns (e.g., `id`, `user_id`, `event_type`, `timestamp`) +/// - **Used for**: Query execution, data processing, result generation +/// - **Location**: Worker nodes only +/// +/// # Schema Separation Benefits +/// +/// - **Modularity**: Head nodes don't need to understand data semantics +/// - **Performance**: Lightweight coordination without full data schema overhead +/// - **Evolution**: Data schemas can change independently of coordination logic +/// - **Scalability**: Head node coordination scales independently of data complexity +/// +/// # Architecture +/// +/// The head service operates as the coordinator in a head-worker distributed system: +/// - Receives flight information requests from clients +/// - Partitions data ranges across available worker nodes using **metadata schema** +/// - Distributes file assignments to worker nodes +/// - Provides flight endpoints for distributed query execution +/// - **Never handles actual data** - only coordination metadata +/// +/// # Flight Protocol Usage +/// +/// - **`get_flight_info`**: Provides flight information with worker endpoints (uses metadata schema) +/// - Other Flight methods are not currently implemented +/// +/// # Coordination Data Flow +/// +/// ```text +/// ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +/// │ Client │───▶│ HeadService │───▶│ Distributor │ +/// │(get_flight_info)│ │(metadata schema)│ │(metadata schema)│ +/// └─────────────────┘ └─────────────────┘ └─────────────────┘ +/// │ │ +/// ▼ ▼ +/// ┌─────────────────┐ ┌─────────────────┐ +/// │ Flight Endpoints│ │ Worker Nodes │ +/// │(metadata schema)│ │ (data schemas) │ +/// └─────────────────┘ └─────────────────┘ +/// ``` +/// +/// # Example Coordination vs Data +/// +/// **Head Node Metadata** (coordination only): +/// ```text +/// ┌────────────┬─────────────────┬───────────────┬─────────────────────────┐ +/// │ worker_ids │ row_start_index │ row_end_index │ file_paths │ +/// ├────────────┼─────────────────┼───────────────┼─────────────────────────┤ +/// │ 0 │ 0 │ 999 │ ["/data/part1.parquet"] │ +/// │ 1 │ 1000 │ 1999 │ ["/data/part2.parquet"] │ +/// └────────────┴─────────────────┴───────────────┴─────────────────────────┘ +/// ``` +/// +/// **Worker Node Data** (actual data structure): +/// ```text +/// ┌────────┬─────────┬────────────┬─────────────┬─────────────┐ +/// │ id │ user_id │ event_type │ timestamp │ cache_index │ +/// │ Int64 │ String │ String │ Timestamp │ UInt64 │ +/// └────────┴─────────┴────────────┴─────────────┴─────────────┘ +/// ``` +/// +/// # Performance Considerations +/// +/// - Partitions data to balance load across workers (using metadata schema) +/// - Uses efficient serialization for flight coordination metadata +/// - Maintains worker topology for optimal data distribution +/// - Supports dynamic worker scaling without data schema dependencies +/// - Lightweight coordination operations independent of data complexity +/// +/// # See Also +/// +/// ## Head Node Coordination (Metadata Schema): +/// - [`Distributor`]: Handles data distribution and worker coordination +/// - [`get_partition_range`]: Calculates data partitioning ranges +/// - [`IndexPair`]: Represents row ranges in flight tickets +/// - [`metadata_arrow_schema()`]: Creates the coordination metadata schema +/// +/// ## Worker Node Data Processing (Data Schema): +/// - [`WorkerDataSource`]: Manages data schemas for actual data processing +/// - [`WorkerService`]: Handles data queries using data schemas +pub struct HeadService { + distributor: Distributor, +} + +impl HeadService { + #[allow(dead_code)] + pub fn new(distributor: Distributor) -> Self { + Self { distributor } + } + + /// Try to get the total row count from the memtable if it exists + async fn get_total_row_count(&self) -> Result> { + use arrow::array::UInt64Array; + use datafusion::error::DataFusionError; + + let df = self.distributor.context() + .sql("SELECT MAX(row_end_indexes) AS max FROM memtable") + .await?; + let results = df.collect().await?; + + if let Some(batch) = results.first() { + let column = batch.column(0); + let max_value = column + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Execution("Failed to downcast to UInt64Array".to_string()) + })? + .value(0); + Ok((max_value + 1) as usize) + } else { + Err("No rows found in memtable".into()) + } + } +} + +#[tonic::async_trait] +impl FlightService for HeadService { + type HandshakeStream = + Pin> + Send + 'static>>; + async fn handshake( + &self, + _request: Request>, + ) -> Result, Status> { + todo!() + } + type ListFlightsStream = + Pin> + Send + 'static>>; + async fn list_flights( + &self, + _request: Request, + ) -> Result, Status> { + todo!() + } + /// Provides flight information for distributed query execution. + /// + /// This method handles client requests for flight information by partitioning + /// data ranges across available worker nodes and returning flight endpoints + /// that clients can use to query specific data ranges. + /// + /// # Parameters + /// + /// - `request`: Flight descriptor containing partition information in the path + /// - `path[0]`: Local rank (partition ID) for the requesting client + /// - `path[1]`: Total number of partitions across all clients + /// + /// # Returns + /// + /// Returns [`FlightInfo`] containing: + /// - Flight endpoints with worker URIs for the requested partition + /// - Serialized [`IndexPair`] tickets containing row ranges + /// - Schema information for the distributed data + /// + /// # Algorithm + /// + /// 1. Parse local rank and total partitions from the flight descriptor + /// 2. Calculate data partition range using [`get_partition_range`] + /// 3. Get worker nodes responsible for the partition range + /// 4. Create flight endpoints with worker locations and row range tickets + /// 5. Return flight information with schema and endpoints + /// + /// # Data Partitioning + /// + /// Data is partitioned evenly across the requested number of partitions: + /// - Each partition gets approximately `total_rows / num_partitions` rows + /// - Partition ranges are calculated to avoid gaps or overlaps + /// - Empty partitions are handled gracefully + /// + /// # Error Handling + /// + /// - Returns `Status::invalid_argument` if path parameters are missing + /// - Returns `Status::invalid_argument` if parameters cannot be parsed + /// - Returns `Status::internal` if worker lookup fails + /// - Returns `Status::internal` if serialization fails + /// + /// # Example Usage + /// + /// For a client requesting partition 0 of 4 total partitions: + /// ``` + /// path = ["0", "4"] + /// // Returns flight endpoints for rows 0 to (total_rows/4 - 1) + /// ``` + /// + /// # Performance Considerations + /// + /// - Uses efficient binary serialization for flight metadata + /// - Minimizes network communication by providing direct worker endpoints + /// - Balances load across available workers + /// - Supports parallel query execution across partitions + /// + /// # See Also + /// + /// - [`get_partition_range`]: Calculates partition boundaries + /// - [`Distributor::get_workers_to_connect`]: Finds responsible workers + /// - [`IndexPair`]: Row range representation in tickets + async fn get_flight_info( + &self, + request: Request, + ) -> Result, Status> { + let request = request.into_inner(); + let local_rank = request + .path + .first() + .ok_or_else(|| Status::invalid_argument("Missing local_rank in path"))?; + let total = request + .path + .get(1) + .ok_or_else(|| Status::invalid_argument("Missing total in path"))?; + let mut pair = IndexPair { start: 0, end: 0 }; + let total_parsed = total + .parse() + .map_err(|_| Status::invalid_argument("Invalid total value"))?; + let local_rank_parsed = local_rank + .parse() + .map_err(|_| Status::invalid_argument("Invalid local_rank value"))?; + + // Get the current total row count (may need to initialize it) + let total_row_count = if self.distributor.total_row_count == 0 { + // Try to get row count from memtable if available + match self.get_total_row_count().await { + Ok(count) => count, + Err(_) => { + info!("Total row count not yet available, returning empty endpoints"); + 0 + } + } + } else { + self.distributor.total_row_count as usize + }; + + let workers = if let Some((start, end)) = get_partition_range( + total_row_count, + total_parsed, + local_rank_parsed, + ) { + //TODO: fetch total count + pair = IndexPair { start, end }; + self.distributor.get_workers_to_connect(start, end).await + } else { + Ok(Vec::new()) + }; + let mut endpoints = vec![]; + for uri in workers.map_err(|e| Status::internal(format!("Error getting workers: {}", e)))? { + endpoints.push(FlightEndpoint { + ticket: Some(Ticket::new(Bytes::from( + bincode::serialize(&pair) + .map_err(|e| Status::internal(format!("Serialization error: {}", e)))?, + ))), + location: vec![Location { uri }], + expiration_time: None, + app_metadata: Bytes::from( + bincode::serialize(&pair) + .map_err(|e| Status::internal(format!("Serialization error: {}", e)))?, + ), + }); + } + + let flight_info = FlightInfo { + schema: Bytes::new(), + flight_descriptor: Some(request), + endpoint: endpoints, + total_records: -1, + total_bytes: -1, + ordered: false, + app_metadata: Default::default(), + }; + + let flight_info = flight_info + .try_with_schema(metadata_arrow_schema().as_ref()) + .map_err(|e| Status::internal(format!("Schema error: {}", e)))?; // TODO:// pass correct schema + Ok(Response::new(flight_info)) + } + async fn poll_flight_info( + &self, + _request: Request, + ) -> Result, Status> { + todo!() + } + async fn get_schema( + &self, + _request: Request, + ) -> Result, Status> { + unimplemented!() + } + + type DoGetStream = Pin> + Send + 'static>>; + + async fn do_get( + &self, + _request: Request, + ) -> Result::DoGetStream>, Status> { + todo!() + } + + type DoPutStream = Pin> + Send + 'static>>; + + async fn do_put( + &self, + _request: Request>, + ) -> Result, Status> { + todo!() + } + + type DoExchangeStream = + Pin> + Send + 'static>>; + + async fn do_exchange( + &self, + _request: Request>, + ) -> Result, Status> { + todo!() + } + + type DoActionStream = + Pin> + Send + 'static>>; + + async fn do_action( + &self, + _request: Request, + ) -> Result, Status> { + todo!() + } + + type ListActionsStream = + Pin> + Send + 'static>>; + + async fn list_actions( + &self, + _request: Request, + ) -> Result, Status> { + todo!() + } +} + +use super::config::config::CacheConfig; +use serde::{Deserialize, Serialize}; + +/// Represents a row range for distributed query execution. +/// +/// This structure is used to communicate row ranges between the head node and +/// worker nodes in the distributed caching system. It is serialized into +/// flight tickets and application metadata for efficient communication. +/// +/// # Fields +/// +/// - `start`: Starting row index (inclusive) +/// - `end`: Ending row index (inclusive) +/// +/// # Serialization +/// +/// The struct is serialized using `bincode` for efficient binary representation +/// in flight tickets and metadata. This enables fast serialization/deserialization +/// across network boundaries. +/// +/// # Usage +/// +/// ```rust +/// let range = IndexPair { start: 0, end: 999 }; +/// // Represents rows 0 through 999 (1000 rows total) +/// ``` +/// +/// # See Also +/// +/// - [`get_partition_range`]: Creates partition ranges that are converted to IndexPair +/// - [`get_flight_info`]: Uses IndexPair in flight tickets +/// - [`do_get`]: Deserializes IndexPair from tickets for query execution +#[derive(Serialize, Deserialize, Debug)] +struct IndexPair { + start: u64, + end: u64, +} + +pub async fn run( + host: &String, + port: &String, + workers: Vec, +) -> datafusion::common::Result<(), Box> { + let ctx = Arc::new(SessionContext::new()); + let addr = format!("{host}:{port}").parse()?; + let num_workers = workers.len(); + let cache_config = CacheConfig::shared_from_env() + .map_err(|e| format!("Failed to load dataset config: {}", e))?; + let metadata_schema = metadata_arrow_schema(); + info!( + "Creating DataFileTableProvider with schema: {:?}", + metadata_schema + ); + let provider = DataFileTableProvider::new( + &cache_config.dataset.metadata_loc, + &cache_config.dataset.table_name, + &cache_config.dataset.schema_name, + metadata_schema.clone(), + num_workers, + ) + .await + .map_err(|e| format!("Failed to create provider: {}", e))?; + let mut worker_map: HashMap = HashMap::new(); + for (index, worker_uri) in workers.into_iter().enumerate() { + // Strip http:// prefix if present before adding grpc:// prefix + let clean_uri = worker_uri.strip_prefix("http://").unwrap_or(&worker_uri); + worker_map.insert(index.to_string(), format!("grpc://{clean_uri}")); + } + let provider_arc = Arc::new(provider); + let worker_map_arc = Arc::new(worker_map); + + let mut distributor = Distributor::new( + ctx.clone(), + num_workers, + provider_arc.clone(), + "memtable".to_string(), + worker_map_arc.clone(), + metadata_schema.clone(), + cache_config.clone(), + ); + + // Only do minimal initialization (fetch data files) without distributing + info!("🚀 Starting minimal initialization (gRPC server will start immediately)"); + let _ = distributor.fetch_data_files().await; + info!("✅ Minimal initialization completed, starting gRPC server"); + + // Start background distribution task + tokio::spawn(async move { + info!("🚀 Starting background data distribution"); + + // Add delay to allow gRPC server to start and workers to be ready + tokio::time::sleep(tokio::time::Duration::from_secs(10)).await; + + match distributor.distribute_and_setup().await { + Ok(_) => { + info!("✅ Background data distribution completed successfully"); + } + Err(e) => { + error!("❌ Background data distribution failed: {}", e); + // Continue running even if distribution fails - periodic retries will handle it + } + } + }); + + info!("🌐 Starting gRPC server on {} (data distribution will happen in background)", addr); + let mut service_distributor = Distributor::new( + ctx, + num_workers, + provider_arc, + "memtable".to_string(), + worker_map_arc, + metadata_schema, + cache_config, + ); + + // Initialize the service distributor's memtable so it can handle flight info requests + let _ = service_distributor.fetch_data_files().await; + + let service = HeadService { distributor: service_distributor }; + tonic::transport::Server::builder() + .add_service(FlightServiceServer::new(service)) + .serve(addr) + .await + .map_err(|e| format!("Error starting server: {}", e))?; + Ok(()) +} + +/// Creates the Arrow schema for **coordination metadata** in the distributed caching system. +/// +/// **IMPORTANT**: This function creates the **metadata schema** used for coordination +/// between the head node and worker nodes. This is **NOT** the data schema that describes +/// the actual data being processed. The data schema is handled separately by worker nodes +/// and is converted from Iceberg format to Arrow format in [`WorkerDataSource`]. +/// +/// # Dual Schema Architecture +/// +/// The distributed caching system uses **two distinct Arrow schemas**: +/// +/// 1. **Metadata Schema** (this function): +/// - Used by head node for worker coordination +/// - Contains worker assignments and file distribution information +/// - Transmitted via Arrow Flight for system coordination +/// +/// 2. **Data Schema** (in workers): +/// - Describes the structure of actual data being cached/queried +/// - Converted from Iceberg table metadata to Arrow format +/// - Enhanced with `cache_index` column for efficient indexing +/// - See [`WorkerDataSource::table_schema`] and [`WorkerDataSource::output_schema`] +/// +/// # Metadata Schema Fields +/// +/// The coordination metadata schema contains four essential fields: +/// +/// - **`worker_ids`** (`UInt64`, non-nullable): Unique identifiers for worker nodes +/// responsible for processing specific data ranges. Used for routing queries +/// to the appropriate workers. +/// +/// - **`row_start_indexes`** (`UInt64`, non-nullable): Starting row indices for +/// data ranges assigned to each worker. Defines the beginning of each worker's +/// data partition (inclusive). +/// +/// - **`row_end_indexes`** (`UInt64`, non-nullable): Ending row indices for +/// data ranges assigned to each worker. Defines the end of each worker's +/// data partition (inclusive). +/// +/// - **`file_paths`** (`List`, non-nullable): List of file paths +/// that each worker is responsible for processing. Supports multiple files +/// per worker for efficient data distribution. +/// +/// # Returns +/// +/// Returns an [`SchemaRef`] (reference-counted Arrow schema) that can be: +/// - Used in flight information responses for coordination +/// - Shared across multiple head node components without cloning +/// - Passed to DataFusion table providers for metadata operations +/// - Serialized in Apache Arrow Flight protocol messages +/// +/// # Usage in Distributed System +/// +/// This **metadata schema** is used throughout the head node coordination layer: +/// 1. **Flight Information**: Attached to flight responses for schema validation +/// 2. **Worker Communication**: Defines the structure of coordination exchanges +/// 3. **Query Planning**: Used by DataFusion for distribution planning +/// 4. **Data Distribution**: Describes how data is partitioned across workers +/// +/// # Example Coordination Metadata Structure +/// +/// ```text +/// ┌────────────┬─────────────────┬───────────────┬─────────────────────────┐ +/// │ worker_ids │ row_start_index │ row_end_index │ file_paths │ +/// ├────────────┼─────────────────┼───────────────┼─────────────────────────┤ +/// │ 0 │ 0 │ 999 │ ["/data/part1.parquet"] │ +/// │ 1 │ 1000 │ 1999 │ ["/data/part2.parquet"] │ +/// │ 2 │ 2000 │ 2999 │ ["/data/part3.parquet"] │ +/// └────────────┴─────────────────┴───────────────┴─────────────────────────┘ +/// ``` +/// +/// # Schema Separation Rationale +/// +/// **Why separate metadata and data schemas?** +/// - **Separation of Concerns**: Coordination logic is independent of data structure +/// - **Schema Evolution**: Data schema can evolve without affecting coordination +/// - **Performance**: Lightweight metadata operations don't need full data schema +/// - **Modularity**: Head nodes don't need to understand data semantics +/// +/// # Schema Compatibility +/// +/// - Uses `UInt64` for row indices to support large datasets (up to 2^64 rows) +/// - Uses `Utf8View` for efficient string storage and reduced memory footprint +/// - Non-nullable fields ensure data integrity across the distributed system +/// - List type supports variable numbers of files per worker +/// +/// # Performance Considerations +/// +/// - Schema is created once and reused via `Arc` for efficiency +/// - `Utf8View` provides zero-copy string operations for file paths +/// - Minimal schema overhead for coordination communication +/// - Compatible with Arrow's columnar format for fast serialization +/// +/// # See Also +/// +/// ## Metadata Schema Usage: +/// - [`DataFileTableProvider`]: Uses this schema for coordination table registration +/// - [`get_flight_info`]: Attaches this schema to flight responses +/// - [`Distributor`]: Uses this schema for metadata operations +/// +/// ## Data Schema Usage (Worker Side): +/// - [`WorkerDataSource::table_schema`]: Original data schema from Iceberg → Arrow +/// - [`WorkerDataSource::output_schema`]: Data schema + cache_index column +/// - [`iceberg::arrow::schema_to_arrow_schema`]: Converts Iceberg schema to Arrow +fn metadata_arrow_schema() -> SchemaRef { + let columns = vec![ + Field::new("worker_ids", DataType::UInt64, false), + Field::new("row_start_indexes", DataType::UInt64, false), + Field::new("row_end_indexes", DataType::UInt64, false), + Field::new( + "file_paths", + DataType::List(Arc::new(Field::new("item", DataType::Utf8View, true))), + false, + ), + ]; + Arc::new(Schema::new(columns)) +} + +/// Calculates the row range for a specific partition in distributed query execution. +/// +/// This function divides the total data count evenly across the requested number +/// of partitions and returns the start and end row indices for the specified +/// partition. It handles edge cases like empty datasets and ensures no gaps +/// or overlaps between partitions. +/// +/// # Parameters +/// +/// - `total_count`: Total number of rows in the dataset +/// - `num_partitions`: Number of partitions to divide the data into +/// - `partition_id`: Zero-based ID of the partition to calculate range for +/// +/// # Returns +/// +/// Returns `Some((start, end))` where: +/// - `start`: First row index for this partition (inclusive) +/// - `end`: Last row index for this partition (inclusive) +/// +/// Returns `None` if: +/// - `total_count` is 0 (empty dataset) +/// - `num_partitions` is 0 (invalid partition count) +/// - `partition_id` >= `num_partitions` (invalid partition ID) +/// +/// # Algorithm +/// +/// 1. Calculate rows per partition using ceiling division +/// 2. Compute start index as `partition_id * rows_per_partition` +/// 3. Compute end index as `min(start + rows_per_partition, total_count)` +/// 4. Return inclusive range `[start, end-1]` +/// +/// # Examples +/// +/// ```rust +/// // 100 rows, 4 partitions +/// assert_eq!(get_partition_range(100, 4, 0), Some((0, 24))); // Rows 0-24 +/// assert_eq!(get_partition_range(100, 4, 1), Some((25, 49))); // Rows 25-49 +/// assert_eq!(get_partition_range(100, 4, 2), Some((50, 74))); // Rows 50-74 +/// assert_eq!(get_partition_range(100, 4, 3), Some((75, 99))); // Rows 75-99 +/// +/// // Edge cases +/// assert_eq!(get_partition_range(0, 4, 0), None); // Empty dataset +/// assert_eq!(get_partition_range(100, 4, 4), None); // Invalid partition ID +/// ``` +/// +/// # Performance Considerations +/// +/// - Uses ceiling division to handle uneven partition sizes +/// - Ensures the last partition gets any remaining rows +/// - Constant time complexity O(1) +/// - No memory allocation required +fn get_partition_range( + total_count: usize, + num_partitions: usize, + partition_id: usize, +) -> Option<(u64, u64)> { + if total_count == 0 || num_partitions == 0 || partition_id >= num_partitions { + return None; + } + + let ids_per_partition = total_count.div_ceil(num_partitions); + + let start_index = partition_id * ids_per_partition; + let end_index = (start_index + ids_per_partition).min(total_count); + + if start_index >= total_count { + None + } else { + Some((start_index as u64, (end_index - 1) as u64)) + } +} diff --git a/pkg/data_cache/src/head/mod.rs b/pkg/data_cache/src/head/mod.rs new file mode 100644 index 0000000000..3b54ab16b7 --- /dev/null +++ b/pkg/data_cache/src/head/mod.rs @@ -0,0 +1,6 @@ +#[path = "../config/mod.rs"] +pub mod config; +pub mod head; +pub mod head_service; +pub mod provider; +pub mod writer; diff --git a/pkg/data_cache/src/head/provider.rs b/pkg/data_cache/src/head/provider.rs new file mode 100644 index 0000000000..5749374ab5 --- /dev/null +++ b/pkg/data_cache/src/head/provider.rs @@ -0,0 +1,846 @@ +use arrow::array::{ + Array, ArrayRef, GenericListBuilder, RecordBatch, StringViewBuilder, UInt64Array, +}; +use arrow_schema::SchemaRef; +use async_trait::async_trait; +use datafusion::catalog::Session; +use datafusion::datasource::{TableProvider, TableType}; +use datafusion::error::{DataFusionError, Result}; +use datafusion::execution::{SendableRecordBatchStream, TaskContext}; +use datafusion::logical_expr::Expr; +use datafusion::physical_expr::{EquivalenceProperties, Partitioning}; +use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::physical_plan::{ + DisplayAs, DisplayFormatType, EmptyRecordBatchStream, ExecutionPlan, PlanProperties, +}; +use futures::StreamExt; +use futures::stream::iter; +use iceberg::TableIdent; +use iceberg::expr::Predicate; +use iceberg::io::FileIO; +use iceberg::scan::{FileScanTask, FileScanTaskStream}; +use iceberg::table::{StaticTable, Table}; +use std::any::Any; +use std::fmt::Formatter; +use std::sync::Arc; +use tracing::{debug, info}; + +/// Table provider for distributed Arrow caching system that coordinates data +/// distribution across multiple worker nodes. +/// +/// This table provider implements the head node functionality in a distributed +/// Arrow-based caching system. It loads Iceberg table metadata and partitions +/// data files across worker nodes for parallel processing. +/// +/// # Architecture +/// +/// The provider uses a head-worker architecture where: +/// - Head node (this provider) coordinates data distribution +/// - Worker nodes execute queries on partitioned data subsets +/// - Data is partitioned based on file scan tasks and record counts +/// +/// # Data Flow +/// +/// ```text +/// ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +/// │ Iceberg Table │───▶│ DataFileTable │───▶│ DataFileTable │ +/// │ Metadata │ │ Provider │ │ Exec │ +/// └─────────────────┘ └─────────────────┘ └─────────────────┘ +/// │ │ +/// ▼ ▼ +/// ┌─────────────────┐ ┌─────────────────┐ +/// │ Partition │ │ Worker Task │ +/// │ Planning │ │ Distribution │ +/// └─────────────────┘ └─────────────────┘ +/// ``` +/// +/// # Performance Considerations +/// +/// - Files are sorted by record count and distributed using a greedy algorithm +/// - Task groups are balanced to minimize data skew across workers +/// - Memory usage scales with the number of files and partitions +/// +/// # See Also +/// +/// - [`DataFileTableExec`]: The execution plan produced by this provider +/// - [`TaskGroup`]: Groups of file scan tasks assigned to worker nodes +#[derive(Debug, Clone)] +pub struct DataFileTableProvider { + inner: Table, + schema: SchemaRef, + num_workers: usize, +} +impl DataFileTableProvider { + pub async fn new( + metadata_loc: &String, + table_name: &String, + schema_name: &String, + metadata_schema: SchemaRef, + num_workers: usize, + ) -> Result> { + info!( + "DataFileTableProvider::new called with metadata_schema: {:?}", + metadata_schema + ); + let file_io = FileIO::from_path(metadata_loc) + .map_err(|e| format!("Failed to create FileIO: {}", e))? + .build() + .map_err(|e| format!("Failed to build FileIO: {}", e))?; + let table_indent = TableIdent::from_strs([schema_name, table_name]) + .map_err(|e| format!("Failed to create table ident: {}", e))?; + let static_table = StaticTable::from_metadata_file(metadata_loc, table_indent, file_io) + .await + .map_err(|e| format!("Failed to load static table: {}", e))?; + let table = static_table.into_table(); + info!("Iceberg table loaded successfully"); + // Use the provided metadata schema instead of Iceberg table schema + let provider = Self { + inner: table, + schema: metadata_schema.clone(), // This should be the metadata schema (worker_ids, row_start_indexes, etc.) + num_workers, + }; + info!( + "DataFileTableProvider created with schema: {:?}", + provider.schema + ); + Ok(provider) + } +} +#[async_trait] +impl TableProvider for DataFileTableProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + info!( + "DataFileTableProvider returning metadata schema: {:?}", + self.schema + ); + self.schema.clone() + } + + fn table_type(&self) -> TableType { + TableType::Base + } + + async fn scan( + &self, + _state: &dyn Session, + _projection: Option<&Vec>, + _filters: &[Expr], + _limit: Option, + ) -> Result> { + let tablescan = self.inner.scan().build().map_err(|e| { + DataFusionError::Execution(format!("Failed to build table scan: {}", e)) + })?; + + // Try to get file scan tasks from Iceberg metadata + let file_scan_task_stream = tablescan + .plan_files() + .await + .map_err(|e| DataFusionError::Execution(format!("Failed to plan files: {}", e)))?; + let partitions = partition_tasks(file_scan_task_stream, self.num_workers) + .await + .map_err(|e| DataFusionError::Execution(format!("Failed to partition tasks: {}", e)))?; + + info!("Partition count: {}", partitions.len()); + info!( + "Total tasks across all partitions: {}", + partitions.iter().map(|p| p.tasks.len()).sum::() + ); + + // If we got empty partitions (no file scan tasks), create fallback tasks + let partitions = if partitions.iter().all(|p| p.tasks.is_empty()) { + info!("No file scan tasks found in Iceberg metadata, creating fallback tasks"); + create_fallback_partitions(self.num_workers) + .await + .map_err(|e| { + DataFusionError::Execution(format!( + "Failed to create fallback partitions: {}", + e + )) + })? + } else { + partitions + }; + + Ok(Arc::new(DataFileTableExec::new( + self.schema.clone(), + partitions, + ))) + } +} + +/// Execution plan for distributed data file processing in Arrow caching system. +/// +/// This execution plan coordinates the distribution of file scan tasks across +/// multiple worker nodes and produces RecordBatches containing worker assignment +/// information and file paths for distributed processing. +/// +/// # Algorithm +/// +/// 1. Load file scan tasks from Iceberg table metadata +/// 2. Partition tasks across worker nodes using greedy load balancing +/// 3. Generate RecordBatches with worker assignments and file paths +/// 4. Each partition contains tasks for a specific worker node +/// +/// # Output Schema +/// +/// The execution plan produces RecordBatches with the following columns: +/// - `worker_ids`: Array of worker node identifiers +/// - `row_start_indexes`: Starting row indices for each task group +/// - `row_end_indexes`: Ending row indices for each task group +/// - `file_paths`: Lists of file paths assigned to each worker +/// +/// # Partitioning Strategy +/// +/// Tasks are distributed across workers using a greedy algorithm: +/// - Sort tasks by record count (descending) +/// - Assign each task to the worker with the least total records +/// - This minimizes data skew and balances workload +/// +/// # Performance Considerations +/// +/// - Each partition corresponds to one worker node +/// - Task assignment is computed once during planning +/// - Memory usage is proportional to number of files and workers +/// - No network communication during execution (coordination only) +/// +/// # See Also +/// +/// - [`DataFileTableProvider`]: The table provider that creates this execution plan +/// - [`TaskGroup`]: Groups of file scan tasks assigned to worker nodes +/// - [`RecordBatchBuilder`]: Builds output RecordBatches with task assignments +#[derive(Debug, Clone)] +pub struct DataFileTableExec { + _projection: Option>, + _predicates: Option, + schema: SchemaRef, + partitions: Arc>, + plan_properties: PlanProperties, +} + +impl DataFileTableExec { + fn new(schema: SchemaRef, partitions: Arc>) -> Self { + // TODO:// revisit plan_properties + let eq_properties = EquivalenceProperties::new_with_orderings(schema.clone(), &[]); + let plan_properties = PlanProperties::new( + eq_properties, + Partitioning::UnknownPartitioning(partitions.len()), // TODO:// address partitioning during scan + EmissionType::Both, + Boundedness::Bounded, + ); + Self { + _projection: None, + _predicates: None, + schema: schema.clone(), + partitions, + plan_properties, + } + } +} + +impl DisplayAs for DataFileTableExec { + fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result { + match t { + DisplayFormatType::Default | DisplayFormatType::Verbose => { + write!(f, "DataFileTableExec: partitions={}", self.partitions.len()) + } + DisplayFormatType::TreeRender => { + write!(f, "partitions={}", self.partitions.len()) + } + } + } +} + +#[async_trait] +impl ExecutionPlan for DataFileTableExec { + fn name(&self) -> &str { + "DataFileTableExec" + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn properties(&self) -> &PlanProperties { + &self.plan_properties + } + + fn children(&self) -> Vec<&Arc<(dyn ExecutionPlan + 'static)>> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Ok(self) + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + let mut builder = RecordBatchBuilder::new(); + // for (index, item) in self.partitions.iter().enumerate() { //support partitions into multiple batches + // builder.add_task(index, item); + // } + if _partition < self.partitions.len() { + builder.add_task( + _partition, + self.partitions.get(_partition).ok_or_else(|| { + DataFusionError::Execution(format!("Partition {} not found", _partition)) + })?, + )?; + let record_batch_stream = builder.build(); + create_multi_batch_stream(self.schema.clone(), vec![record_batch_stream]) + } else { + Ok(Box::pin(EmptyRecordBatchStream::new(self.schema.clone()))) + } + } +} + +fn create_multi_batch_stream( + schema: SchemaRef, + batches: Vec>, +) -> Result { + let stream = iter(batches); + Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream))) +} +#[derive(Debug, Default)] +struct RecordBatchBuilder { + worker_ids: Vec, + file_paths: Vec>, + row_start_indexes: Vec, + row_end_indexes: Vec, +} + +impl RecordBatchBuilder { + fn new() -> Self { + Self::default() + } + fn add_task(&mut self, index: usize, group: &TaskGroup) -> Result<()> { + if group.tasks.is_empty() { + return Ok(()); + } + let mut _row_count = 0; + let mut file_paths = Vec::new(); + for task in group.tasks.iter() { + file_paths.push(task.data_file_path.clone()); + _row_count += task.record_count.ok_or_else(|| { + DataFusionError::Execution("Task record count is None".to_string()) + })? //check if filescantask doesnt have record_count. IcebergFileScan always includes complete datafile + } + self.file_paths.push(file_paths); + self.row_start_indexes.push(group.start_index as u64); + self.row_end_indexes.push(group.end_index as u64); + self.worker_ids.push(index as u64); + Ok(()) + } + + fn build(self) -> Result { + let worker_ids: ArrayRef = Arc::new(UInt64Array::from(self.worker_ids)); + let row_start_indexes: ArrayRef = Arc::new(UInt64Array::from(self.row_start_indexes)); + let row_end_indexes: ArrayRef = Arc::new(UInt64Array::from(self.row_end_indexes)); + + let worker_list_builder = StringViewBuilder::new(); + let mut file_paths_builder = + GenericListBuilder::::new(worker_list_builder); + + debug!("file_paths: {:?}", self.file_paths); + + for paths in self.file_paths { + for path in paths { + file_paths_builder.values().append_value(path); + } + file_paths_builder.append(true) + } + + let file_paths = Arc::new(file_paths_builder.finish()); + + // Log the actual data types being created + info!("Creating RecordBatch with types:"); + info!(" worker_ids: {:?}", worker_ids.data_type()); + info!(" row_start_indexes: {:?}", row_start_indexes.data_type()); + info!(" row_end_indexes: {:?}", row_end_indexes.data_type()); + info!(" file_paths: {:?}", file_paths.data_type()); + + let rb = RecordBatch::try_from_iter(vec![ + ("worker_ids", worker_ids), + ("row_start_indexes", row_start_indexes), + ("row_end_indexes", row_end_indexes), + ("file_paths", file_paths), + ])?; + + info!("RecordBatch schema: {:?}", rb.schema()); + Ok(rb) + } +} + +/// Groups of file scan tasks assigned to a specific worker node in the distributed system. +/// +/// Each TaskGroup represents a logical partition of work that will be executed by +/// a single worker node. Tasks are grouped to balance workload and minimize data +/// skew across the distributed system. +/// +/// # Fields +/// +/// - `tasks`: File scan tasks assigned to this worker +/// - `start_index`: Global starting row index for this task group +/// - `end_index`: Global ending row index for this task group +/// +/// # Row Index Calculation +/// +/// Row indices provide a global ordering across all task groups: +/// - Group 0: rows 0 to (count-1) +/// - Group 1: rows count to (count + next_count - 1) +/// - And so on... +/// +/// This enables consistent row numbering across distributed workers. +#[derive(Debug, Clone)] +struct TaskGroup { + tasks: Vec, + start_index: usize, + end_index: usize, +} + +/// Creates fallback partitions with mock file scan tasks when Iceberg metadata is incomplete. +/// +/// This function is used when the Iceberg metadata doesn't contain snapshots or +/// file references, but we know that data files exist in S3. It creates mock +/// FileScanTask objects pointing to the known S3 data files. +async fn create_fallback_partitions( + num_groups: usize, +) -> Result>, Box> { + use iceberg::spec::DataFileFormat; + use std::env; + + info!("Creating fallback partitions for {} workers", num_groups); + + // Make known S3 data files configurable via env var + // ARROW_CACHE_FALLBACK_FILES accepts a comma-separated list of entries: + // - Either just the URL: s3://bucket/path/file.parquet (uses default rows) + // - Or URL with estimated rows: s3://bucket/path/file.parquet:1000 + // Defaults: + let default_rows: u64 = env::var("ARROW_CACHE_FALLBACK_DEFAULT_ROWS") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(1000); + + let known_files: Vec<(String, u64)> = match env::var("ARROW_CACHE_FALLBACK_FILES") { + Ok(val) if !val.trim().is_empty() => { + let mut out = Vec::new(); + for entry in val.split(',').map(|s| s.trim()).filter(|s| !s.is_empty()) { + if let Some((url, rows_str)) = entry.rsplit_once(':') { + let rows = rows_str.parse::().unwrap_or(default_rows); + out.push((url.to_string(), rows)); + } else { + out.push((entry.to_string(), default_rows)); + } + } + out + } + _ => { + info!("ARROW_CACHE_FALLBACK_FILES not set; using built-in demo files"); + vec![ + ( + "s3://ricardometadata/data/data_000.parquet".to_string(), + default_rows, + ), + ( + "s3://ricardometadata/data/data_001.parquet".to_string(), + default_rows, + ), + ] + } + }; + + info!("Fallback known files: {:?}", known_files); + + let mut mock_tasks = Vec::new(); + + for (file_path, estimated_records) in known_files { + let mock_task = FileScanTask { + start: 0, + length: estimated_records, + record_count: Some(estimated_records), + data_file_path: file_path.to_string(), + schema: create_mock_iceberg_schema(), + project_field_ids: vec![], + predicate: None, + data_file_format: DataFileFormat::Parquet, + deletes: vec![], + }; + mock_tasks.push(mock_task); + info!( + "Created mock task for file: {} ({} estimated records)", + file_path, estimated_records + ); + } + + // Now partition these mock tasks using the existing algorithm + let mut groups: Vec = vec![ + TaskGroup { + tasks: Vec::new(), + start_index: 0, + end_index: 0 + }; + num_groups + ]; + let mut group_sizes: Vec = vec![0; num_groups]; + + mock_tasks.sort_by_key(|task| std::cmp::Reverse(task.record_count)); + + for task in mock_tasks { + let min_group_index = group_sizes + .iter() + .enumerate() + .min_by_key(|&(_, size)| size) + .map(|(index, _)| index) + .ok_or_else(|| "Failed to find minimum group")?; + + let record_count = task + .record_count + .ok_or_else(|| "Task record count is None")? as usize; + + group_sizes[min_group_index] += record_count; + groups[min_group_index].tasks.push(task); + } + + let mut end = 0; + for (i, elem) in groups.iter_mut().enumerate() { + let start = end; + end += group_sizes[i]; + elem.start_index = start; + elem.end_index = if end > 0 { end - 1 } else { 0 }; + info!( + "Fallback group {}: {} tasks, rows {}-{} ({} total)", + i, + elem.tasks.len(), + elem.start_index, + elem.end_index, + group_sizes[i] + ); + } + + Ok(Arc::new(groups)) +} + +/// Creates a mock Iceberg schema for fallback file scan tasks. +fn create_mock_iceberg_schema() -> Arc { + use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; + + // Create fields that match the actual S3 data files + let fields = vec![ + Arc::new(NestedField::required( + 1, + "id", + Type::Primitive(PrimitiveType::Long), + )), + Arc::new(NestedField::required( + 2, + "user_id", + Type::Primitive(PrimitiveType::String), + )), + Arc::new(NestedField::required( + 3, + "event_type", + Type::Primitive(PrimitiveType::String), + )), + Arc::new(NestedField::required( + 4, + "timestamp", + Type::Primitive(PrimitiveType::Timestamptz), + )), + ]; + + let schema = Schema::builder() + .with_schema_id(0) + .with_fields(fields) + .build() + .expect("Failed to create mock Iceberg schema"); + + Arc::new(schema) +} + +async fn partition_tasks( + mut stream: FileScanTaskStream, + num_groups: usize, +) -> Result>, Box> { + let mut tasks: Vec = Vec::new(); + while let Some(result) = stream.next().await { + match result { + Ok(task) => { + tasks.push(task); + } + Err(_e) => { + // Handle the error + } + } + } + + let mut groups: Vec = vec![ + TaskGroup { + tasks: Vec::new(), + start_index: 0, + end_index: 0 + }; + num_groups + ]; + let mut group_sizes: Vec = vec![0; num_groups]; + + tasks.sort_by_key(|task| std::cmp::Reverse(task.record_count)); + + for task in tasks { + let min_group_index = group_sizes + .iter() + .enumerate() + .min_by_key(|&(_, size)| size) + .map(|(index, _)| index) + .ok_or_else(|| format!("Failed to find minimum group"))?; + + let record_count = + task.record_count + .ok_or_else(|| format!("Task record count is None"))? as usize; + + group_sizes[min_group_index] += record_count; + groups[min_group_index].tasks.push(task); + } + + let mut end = 0; + for (i, elem) in groups.iter_mut().enumerate() { + let start = end; + end += group_sizes[i]; + elem.start_index = start; + elem.end_index = if end > 0 { end - 1 } else { 0 }; + } + Ok(Arc::new(groups)) +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow::datatypes::{DataType, Field, Schema}; + use datafusion::prelude::SessionContext; + use iceberg::spec::DataFileFormat; + + fn create_test_schema() -> SchemaRef { + Arc::new(Schema::new(vec![ + Field::new("worker_ids", DataType::UInt64, false), + Field::new("row_start_indexes", DataType::UInt64, false), + Field::new("row_end_indexes", DataType::UInt64, false), + Field::new( + "file_paths", + DataType::List(Arc::new(Field::new("item", DataType::Utf8View, true))), + false, + ), + ])) + } + + fn create_iceberg_schema() -> Arc { + Arc::new( + iceberg::spec::Schema::builder() + .with_schema_id(1) + .build() + .unwrap(), + ) + } + + fn create_test_file_scan_task_with_record_count( + record_count: u64, + file_path: &str, + ) -> FileScanTask { + FileScanTask { + start: 0, + length: record_count, + record_count: Some(record_count), + data_file_path: String::from(file_path), + schema: create_iceberg_schema(), + project_field_ids: vec![], + predicate: None, + data_file_format: DataFileFormat::Parquet, + deletes: vec![], + } + } + + #[tokio::test] + async fn test_data_file_table_exec_invalid_partition() -> Result<(), Box> + { + let schema = create_test_schema(); + let partitions = Arc::new(vec![]); + + let exec = DataFileTableExec::new(schema.clone(), partitions); + let session_ctx = SessionContext::new(); + let task_ctx = session_ctx.task_ctx(); + + let result = exec.execute(0, task_ctx); + assert!(result.is_ok()); + Ok(()) + } + + #[tokio::test] + async fn test_partition_tasks_empty_stream() -> Result<(), Box> { + use futures::stream; + + let empty_stream = stream::empty(); + let result = partition_tasks(Box::pin(empty_stream), 2).await?; + + assert_eq!(result.len(), 2); + assert!(result[0].tasks.is_empty()); + assert!(result[1].tasks.is_empty()); + assert_eq!(result[0].start_index, 0); + assert_eq!(result[0].end_index, 0); + assert_eq!(result[1].start_index, 0); + assert_eq!(result[1].end_index, 0); + + Ok(()) + } + + #[tokio::test] + async fn test_partition_tasks_with_stream_errors() -> Result<(), Box> { + use futures::stream; + + // Test the error handling path in partition_tasks + let mixed_stream = stream::iter(vec![ + Err(iceberg::Error::new( + iceberg::ErrorKind::DataInvalid, + "Test error 1", + )), + Err(iceberg::Error::new( + iceberg::ErrorKind::DataInvalid, + "Test error 2", + )), + ]); + + let result = partition_tasks(Box::pin(mixed_stream), 2).await?; + + assert_eq!(result.len(), 2); + assert!(result[0].tasks.is_empty()); + assert!(result[1].tasks.is_empty()); + + Ok(()) + } + #[tokio::test] + async fn test_partition_tasks_validates_start_end_indices_empty() + -> Result<(), Box> { + use futures::stream; + + let empty_stream = stream::empty(); + let num_groups = 3; + + let result = partition_tasks(Box::pin(empty_stream), num_groups).await?; + + // Verify we have the expected number of groups + assert_eq!(result.len(), num_groups); + + // Verify start and end indices for empty groups + for (group_idx, group) in result.iter().enumerate() { + assert_eq!( + group.start_index, 0, + "Empty group {} should have start_index 0, got {}", + group_idx, group.start_index + ); + + assert_eq!( + group.end_index, 0, + "Empty group {} should have end_index 0, got {}", + group_idx, group.end_index + ); + + assert!( + group.tasks.is_empty(), + "Empty group {} should have no tasks", + group_idx + ); + } + + Ok(()) + } + + #[tokio::test] + async fn test_partition_tasks_validates_start_end_indices_varying_groups() + -> Result<(), Box> { + use futures::stream; + + // Test Case 1: Single group with multiple files + let single_group_tasks = vec![ + create_test_file_scan_task_with_record_count(1000, "/test/file1.parquet"), + create_test_file_scan_task_with_record_count(500, "/test/file2.parquet"), + create_test_file_scan_task_with_record_count(750, "/test/file3.parquet"), + ]; + let single_stream = stream::iter(single_group_tasks.into_iter().map(Ok)); + let single_result = partition_tasks(Box::pin(single_stream), 1).await?; + + assert_eq!(single_result.len(), 1); + assert_eq!(single_result[0].tasks.len(), 3); + + // Calculate total records and verify indices + let total_records: usize = single_result[0] + .tasks + .iter() + .map(|t| t.record_count.unwrap_or(0) as usize) + .sum(); + assert_eq!(total_records, 2250); // 1000 + 500 + 750 + assert_eq!(single_result[0].start_index, 0); + assert_eq!(single_result[0].end_index, 2249); // 2250 - 1 + + // Test Case 2: Two groups + let two_group_tasks = vec![ + create_test_file_scan_task_with_record_count(2000, "/test/file1.parquet"), + create_test_file_scan_task_with_record_count(1500, "/test/file2.parquet"), + create_test_file_scan_task_with_record_count(1000, "/test/file3.parquet"), + create_test_file_scan_task_with_record_count(500, "/test/file4.parquet"), + ]; + let two_stream = stream::iter(two_group_tasks.into_iter().map(Ok)); + let two_result = partition_tasks(Box::pin(two_stream), 2).await?; + + assert_eq!(two_result.len(), 2); + + // Calculate records per group and verify distribution + let mut group_records = vec![0; 2]; + for (group_idx, group) in two_result.iter().enumerate() { + for task in &group.tasks { + if let Some(count) = task.record_count { + group_records[group_idx] += count as usize; + } + } + } + + let total_records: usize = group_records.iter().sum(); + assert_eq!(total_records, 5000); // 2000 + 1500 + 1000 + 500 + + // Verify start and end indices are calculated correctly + let mut expected_start = 0; + for (group_idx, group) in two_result.iter().enumerate() { + assert_eq!( + group.start_index, expected_start, + "Group {} start_index should be {}, got {}", + group_idx, expected_start, group.start_index + ); + + if group_records[group_idx] > 0 { + let expected_end = expected_start + group_records[group_idx] - 1; + assert_eq!( + group.end_index, expected_end, + "Group {} end_index should be {}, got {}", + group_idx, expected_end, group.end_index + ); + + let range_size = group.end_index - group.start_index + 1; + assert_eq!( + range_size, group_records[group_idx], + "Group {} range size ({}) should match record count ({})", + group_idx, range_size, group_records[group_idx] + ); + } + + expected_start += group_records[group_idx]; + } + + Ok(()) + } +} diff --git a/pkg/data_cache/src/head/writer.rs b/pkg/data_cache/src/head/writer.rs new file mode 100644 index 0000000000..bcf0f7e14e --- /dev/null +++ b/pkg/data_cache/src/head/writer.rs @@ -0,0 +1,580 @@ +use super::config::config::CacheConfig; +use arrow::array::UInt64Array; +use arrow::record_batch::RecordBatch; +use arrow_flight::FlightClient; +use arrow_flight::encode::FlightDataEncoderBuilder; +use arrow_schema::{DataType, SchemaRef}; +use datafusion::common::exec_err; +use datafusion::error::{DataFusionError, Result}; +use datafusion::execution::{SendableRecordBatchStream, TaskContext}; +use datafusion::physical_expr::{EquivalenceProperties, Partitioning}; +use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::physical_plan::{ + DisplayAs, DisplayFormatType, EmptyRecordBatchStream, ExecutionPlan, PlanProperties, +}; +use futures::{StreamExt, TryStreamExt}; +use std::any::Any; +use std::collections::HashMap; +use std::fmt::{Debug, Formatter}; +use std::sync::Arc; +use std::time::Duration; +use tokio::time::{Instant, sleep}; +use tracing::{error, info, warn}; + +/// Execution plan for distributed writing to worker nodes via Apache Arrow Flight. +/// +/// This execution plan coordinates the distribution of RecordBatches to worker +/// nodes based on worker assignments contained in the data. It reads worker +/// assignments from input batches and sends data to the appropriate worker +/// nodes using Arrow Flight protocol. +/// +/// # Architecture +/// +/// The distributed writer operates in a head-worker architecture: +/// - Head node executes this plan to coordinate data distribution +/// - Worker nodes receive data via Arrow Flight `do_put` operations +/// - Worker assignments are embedded in the input data as `worker_ids` column +/// - Each batch is sent to the worker specified in its `worker_ids` column +/// +/// # Algorithm +/// +/// 1. Execute the input execution plan to get RecordBatches +/// 2. For each batch: +/// - Extract worker ID from the `worker_ids` column +/// - Look up worker address in the worker map +/// - Establish Arrow Flight connection to worker +/// - Send batch using `do_put` operation +/// 3. Return empty stream after all batches are distributed +/// +/// # Data Flow +/// +/// ```text +/// ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +/// │ Input Plan │───▶│ Distributed │───▶│ Worker Node 1 │ +/// │ (worker tasks) │ │ WriterExec │ │ (via Flight) │ +/// └─────────────────┘ └─────────────────┘ └─────────────────┘ +/// │ +/// ├──────────────────────▶│ Worker Node 2 │ +/// │ │ (via Flight) │ +/// │ └─────────────────┘ +/// │ +/// └──────────────────────▶│ Worker Node N │ +/// │ (via Flight) │ +/// └─────────────────┘ +/// ``` +/// +/// # Worker Assignment Format +/// +/// Input batches must contain a `worker_ids` column with: +/// - Data type: UInt64 +/// - Contains worker node identifier for each batch +/// - Used to lookup worker address in the worker map +/// +/// # Error Handling +/// +/// The execution plan handles various error conditions: +/// - Network failures during Flight connections +/// - Missing worker IDs in input batches +/// - Worker nodes not found in worker map +/// - Flight protocol errors during data transmission +/// +/// # Performance Considerations +/// +/// - Establishes new Flight connections for each batch (stateless) +/// - Uses configurable timeouts for connection and data transfer +/// - Processes batches sequentially to avoid overwhelming workers +/// - Memory efficient: streams data without buffering entire dataset +/// +/// # See Also +/// +/// - [`ExecutorClient`]: Arrow Flight client for worker communication +/// - [`send_record_batch`]: Function that performs the actual data distribution +/// - [`DataFileTableExec`]: Typical input execution plan that provides worker assignments +#[derive(Debug, Clone)] +pub struct DistributedWriterExec { + input: Arc, + worker_map: Arc>, + metadata_schema: SchemaRef, + plan_properties: PlanProperties, + config: Arc, +} + +impl DisplayAs for DistributedWriterExec { + fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result { + match t { + DisplayFormatType::Default | DisplayFormatType::Verbose => { + write!( + f, + "DistributedWriterExec: workers={}", + self.worker_map.len() + ) + } + DisplayFormatType::TreeRender => { + write!(f, "workers={}", self.worker_map.len()) + } + } + } +} + +impl DistributedWriterExec { + pub fn new( + input: Arc, + worker_map: Arc>, + metadata_schema: SchemaRef, + num_partitions: usize, + config: Arc, + ) -> Self { + // TODO:// revisit plan_properties + let eq_properties = EquivalenceProperties::new_with_orderings(metadata_schema.clone(), &[]); + let plan_properties = PlanProperties::new( + eq_properties, // Equivalence Properties + Partitioning::UnknownPartitioning(num_partitions), // Output Partitioning + EmissionType::Both, + Boundedness::Bounded, // Execution Mode + ); + Self { + input, + worker_map, + metadata_schema, + plan_properties, + config, + } + } +} + +impl ExecutionPlan for DistributedWriterExec { + fn name(&self) -> &str { + "DistributedWriterExec" + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn properties(&self) -> &PlanProperties { + &self.plan_properties + } + + fn children(&self) -> Vec<&Arc<(dyn ExecutionPlan + 'static)>> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Ok(self) + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + info!("In partition: {_partition}"); + //let addr = self.worker_map.get(&_partition.to_string()).unwrap().clone(); + let stream = futures::stream::once(send_record_batch( + self.input.clone(), + _context, + _partition, + self.worker_map.clone(), + self.config.clone(), + )) + .try_flatten(); + Ok(Box::pin(RecordBatchStreamAdapter::new( + self.metadata_schema.clone(), + stream, + ))) + } +} + +pub async fn send_record_batch( + input: Arc, + _context: Arc, + _partition: usize, + worker_map: Arc>, + config: Arc, +) -> Result { + info!("Executing batch of partition: {_partition}"); + let mut stream = match input.execute(_partition, _context) { + Err(e) => { + let err = e.to_string(); + error!("Stopping execution: error executing input: {err}"); + return exec_err!("Error with executing input plan in distributedExec"); + } + Ok(stream) => stream, + }; + + while let Some(item) = stream.next().await { + match item { + Ok(rb) => { + info!("sending rb :{:?}", rb); + let worker_ids = rb.column_by_name("worker_ids").ok_or_else(|| { + DataFusionError::Execution("worker_ids column not found".to_string()) + })?; + let worker_id = if let DataType::UInt64 = worker_ids.data_type() { + let list_array = worker_ids + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Execution( + "Failed to downcast to UInt64Array".to_string(), + ) + })?; + let values = list_array.values(); + values.first().ok_or_else(|| { + DataFusionError::Execution("No worker ID found".to_string()) + })? + } else { + return exec_err!("Expected UInt64 DataType"); + }; + info!("Sending batch of partition: {_partition}"); + let addr = worker_map.get(&worker_id.to_string()).ok_or_else(|| { + DataFusionError::Execution(format!( + "Worker {} not found in worker map", + worker_id + )) + })?; + + // Send batch with retry mechanism + match send_batch_with_retry(addr, rb.clone(), config.clone()).await { + Ok(_) => { + info!("Successfully sent batch to worker {}", worker_id); + } + Err(e) => { + // Change error level to warning for batch delivery failures + // This allows the head to keep running even if some batches fail + warn!( + "Failed to send batch to worker {} after all retries: {}", + worker_id, e + ); + // Continue processing other batches instead of failing entirely + continue; + } + } + } + Err(error) => { + let err = error.to_string(); + error!("Stopping execution: {err}"); + return exec_err!("error sending batch"); + } + } + } + Ok(Box::pin(EmptyRecordBatchStream::new(input.schema()))) +} + +/// Send a record batch to a worker with retry mechanism +/// +/// This function implements configurable retry intervals for robust +/// worker communication in distributed environments. +/// +/// # Arguments +/// +/// * `addr` - Worker address to connect to +/// * `record_batch` - The data batch to send +/// * `config` - Cache configuration with timeout settings +/// +/// # Retry Strategy +/// +/// - Maximum retries: 3 (configurable via ARROW_CACHE_MAX_RETRIES) +/// - Retry interval: 5 seconds (configurable via ARROW_CACHE_RETRY_INTERVAL_MS) +/// - Fixed interval with optional jitter +/// +/// # Error Handling +/// +/// - Transient errors (connection timeouts, network issues) trigger retries +/// - "Table already exists" errors are logged as INFO (worker already has data) +/// - Permanent errors (authentication, invalid endpoints) fail immediately +/// - All errors are logged with appropriate severity levels +async fn send_batch_with_retry( + addr: &String, + record_batch: RecordBatch, + config: Arc, +) -> Result<()> { + // Configurable retry parameters via environment variables + let max_retries: u32 = std::env::var("ARROW_CACHE_MAX_RETRIES") + .unwrap_or_else(|_| "3".to_string()) + .parse() + .unwrap_or(3); + + let retry_interval_ms: u64 = std::env::var("ARROW_CACHE_RETRY_INTERVAL_MS") + .unwrap_or_else(|_| "5000".to_string()) + .parse() + .unwrap_or(5000); + + const JITTER_PERCENT: f64 = 0.1; // Reduced jitter for more predictable timing + + let mut retry_count = 0; + let start_time = Instant::now(); + + loop { + info!( + "Attempting to send batch to {} (attempt {}/{})", + addr, + retry_count + 1, + max_retries + 1 + ); + + match ExecutorClient::try_new(addr, config.connect_timeout).await { + Ok(mut client) => { + info!("Successfully connected to worker at {}", addr); + + match client + .send_batch(record_batch.schema(), vec![Ok(record_batch.clone())]) + .await + { + Ok(_) => { + let elapsed = start_time.elapsed(); + info!( + "Successfully sent batch to {} in {:?} (attempt {})", + addr, + elapsed, + retry_count + 1 + ); + return Ok(()); + } + Err(send_error) => { + // Check if this is a permanent error that shouldn't be retried + if is_permanent_error(&send_error) { + // Check specifically for memtable already exists (success case) + if send_error + .to_string() + .to_lowercase() + .contains("memtable already exists") + { + // Worker already has data loaded, this is actually a success case + info!( + "Worker at {} already has data loaded (memtable exists)", + addr + ); + return Ok(()); + } else if send_error.to_string().to_lowercase().contains("table") + && send_error + .to_string() + .to_lowercase() + .contains("already exists") + { + // Generic table already exists, also a success case + info!("Worker at {} already has required table", addr); + return Ok(()); + } else { + // Other permanent errors should be reported + warn!("Detected permanent error, skipping retries: {}", send_error); + return Err(send_error); + } + } + + // For non-permanent errors, log as error + error!("Failed to send batch to {}: {}", addr, send_error); + + if retry_count < max_retries { + warn!( + "Send failed, will retry in {}ms (attempt {}/{})", + retry_interval_ms, + retry_count + 1, + max_retries + 1 + ); + } else { + let total_elapsed = start_time.elapsed(); + error!( + "Exhausted all {} retry attempts to send batch to {} after {:?}", + max_retries + 1, + addr, + total_elapsed + ); + return Err(DataFusionError::Execution(format!( + "Failed to send batch to {} after {} retries: {}", + addr, + max_retries + 1, + send_error + ))); + } + } + } + } + Err(connect_error) => { + error!("Failed to connect to worker at {}: {}", addr, connect_error); + + if retry_count < max_retries { + warn!( + "Connection failed, will retry in {}ms (attempt {}/{})", + retry_interval_ms, + retry_count + 1, + max_retries + 1 + ); + } else { + let total_elapsed = start_time.elapsed(); + error!( + "Exhausted all {} retry attempts to connect to {} after {:?}", + max_retries + 1, + addr, + total_elapsed + ); + return Err(DataFusionError::Execution(format!( + "Failed to connect to {} after {} retries: {}", + addr, + max_retries + 1, + connect_error + ))); + } + } + } + + if retry_count < max_retries { + // Apply small jitter to avoid thundering herd problem + let jitter = (fastrand::f64() - 0.5) * 2.0 * JITTER_PERCENT; + let jittered_delay = (retry_interval_ms as f64 * (1.0 + jitter)) as u64; + + info!( + "Waiting {}ms before retry {} of {}", + jittered_delay, + retry_count + 1, + max_retries + ); + sleep(Duration::from_millis(jittered_delay)).await; + + retry_count += 1; + } else { + break; + } + } + + let total_elapsed = start_time.elapsed(); + Err(DataFusionError::Execution(format!( + "Failed to send batch to {} after {} retries and {:?}", + addr, + max_retries + 1, + total_elapsed + ))) +} + +/// Determines if an error is permanent and should not be retried +/// +/// Permanent errors include: +/// - Table already exists (data already loaded) +/// - Schema mismatches +/// - Authentication failures +/// - Invalid configuration +/// +/// Transient errors that should be retried include: +/// - Network timeouts +/// - Connection refused (worker starting up) +/// - DNS resolution failures (temporary) +fn is_permanent_error(error: &DataFusionError) -> bool { + let error_message = error.to_string().to_lowercase(); + + // Check for permanent application errors + if error_message.contains("table") && error_message.contains("already exists") { + return true; + } + + if error_message.contains("memtable already exists") { + return true; + } + + // Schema-related errors are usually permanent + if error_message.contains("schema mismatch") + || error_message.contains("invalid schema") + || error_message.contains("column not found") + { + return true; + } + + // Authentication/authorization errors are permanent + if error_message.contains("unauthorized") + || error_message.contains("permission denied") + || error_message.contains("authentication failed") + || error_message.contains("invalidaccesskeyid") + || error_message.contains("accessdenied") + || error_message.contains("signaturemismatch") + || error_message.contains("tokenmismatch") + || error_message.contains("the aws access key id you provided does not exist") + || error_message.contains("aws access key id") + || error_message.contains("s3error") + { + return true; + } + + // Invalid endpoint configurations are permanent + if error_message.contains("invalid uri") || error_message.contains("malformed url") { + return true; + } + + // All other errors are considered transient and should be retried + // This includes: + // - Connection timeouts + // - DNS resolution failures + // - Connection refused (worker starting) + // - Network unreachable + false +} + +/// Arrow Flight client for communicating with worker nodes. +/// +/// This client provides a high-level interface for sending RecordBatches to +/// worker nodes using the Arrow Flight protocol. It handles connection +/// establishment, data encoding, and error handling. +/// +/// # Connection Management +/// +/// - Establishes connections with configurable timeouts +/// - Uses connection pooling for efficient resource usage +/// - Handles connection failures gracefully +/// - Supports both connection and request timeouts +/// +/// # Data Transfer +/// +/// - Encodes RecordBatches using Arrow Flight format +/// - Sends data via `do_put` operations +/// - Handles streaming large datasets efficiently +/// - Provides error handling for transmission failures +/// +/// # Performance Characteristics +/// +/// - Connection timeout: 20 seconds +/// - Request timeout: 60 seconds +/// - Streaming data transfer for memory efficiency +/// - Automatic retry on transient failures +/// +/// # See Also +/// +/// - [`DistributedWriterExec`]: The execution plan that uses this client +/// - [`send_record_batch`]: Function that coordinates batch distribution +pub struct ExecutorClient { + flight_client: FlightClient, +} + +impl ExecutorClient { + pub async fn try_new(addr: &String, connect_timeout: Duration) -> Result { + info!("Connecting to {}", addr); + let connection = tonic::transport::Endpoint::new(addr.clone()) + .map_err(|e| DataFusionError::Execution(format!("Failed to create endpoint: {}", e)))? + .connect_timeout(connect_timeout) + .timeout(Duration::from_secs(300)) // Increased timeout for large data transfers + .connect() + .await + .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?; + let flight_client = FlightClient::new(connection); + info!("Connected to {}", addr); + + Ok(Self { flight_client }) + } + + pub async fn send_batch( + &mut self, + schema: SchemaRef, + record_batches: Vec>, + ) -> Result { + let flight_data_stream = FlightDataEncoderBuilder::new() + .build(futures::stream::iter(record_batches.into_iter())); + self.flight_client + .do_put(flight_data_stream) + .await + .map_err(|e| DataFusionError::Execution(format!("Error sending batch: {e:?}")))? + .try_collect::>() + .await + .map_err(|e| DataFusionError::Execution(format!("Error calling do_put: {}", e)))?; + Ok(Box::pin(EmptyRecordBatchStream::new(schema))) + } +} diff --git a/pkg/data_cache/src/lib.rs b/pkg/data_cache/src/lib.rs new file mode 100644 index 0000000000..b202b4d4ad --- /dev/null +++ b/pkg/data_cache/src/lib.rs @@ -0,0 +1,39 @@ +//! Distributed Arrow-based data caching system with dual schema architecture. +//! +//! # Dual Schema Architecture +//! +//! This caching system uses **two completely separate Arrow schemas** for different purposes: +//! +//! ## 1. Metadata Schema (Head Node Coordination) +//! - **Purpose**: Coordinate worker assignments and data distribution +//! - **Location**: Head node ([`head`] module) +//! - **Created by**: [`head::head_service::metadata_arrow_schema()`] +//! - **Contains**: `worker_ids`, `row_start_indexes`, `row_end_indexes`, `file_paths` +//! - **Format**: Arrow schema with coordination fields +//! - **Usage**: Flight protocol, task distribution, worker routing +//! +//! ## 2. Data Schema (Worker Data Processing) +//! - **Purpose**: Describe actual data structure being cached and queried +//! - **Location**: Worker nodes ([`worker`] module) +//! - **Created by**: Converting Iceberg schema to Arrow in [`worker::worker_datasource::WorkerDataSource`] +//! - **Contains**: Actual data columns (e.g., `id`, `user_id`, `event_type`, `timestamp`) + `cache_index` +//! - **Format**: Arrow schema converted from Iceberg table metadata +//! - **Usage**: Query execution, data processing, result generation +//! +//! # Architecture Benefits +//! +//! - **Separation of Concerns**: Coordination logic independent of data structure +//! - **Schema Evolution**: Data schemas can evolve without affecting coordination +//! - **Performance**: Lightweight metadata operations without full data schema overhead +//! - **Modularity**: Head nodes don't need to understand data semantics +//! - **Scalability**: Coordination scales independently of data complexity +//! +//! # Module Organization +//! +//! - [`config`]: Configuration management for Iceberg table coordinates +//! - [`head`]: Head node coordination using metadata schema +//! - [`worker`]: Worker node data processing using data schema + +pub mod config; +pub mod head; +pub mod worker; diff --git a/pkg/data_cache/src/worker/bin/main.rs b/pkg/data_cache/src/worker/bin/main.rs new file mode 100644 index 0000000000..d360020884 --- /dev/null +++ b/pkg/data_cache/src/worker/bin/main.rs @@ -0,0 +1,16 @@ +use tracing::info; +use tracing_subscriber; + +#[path = "../mod.rs"] +mod worker; + +#[tokio::main] +async fn main() -> Result<(), Box> { + tracing_subscriber::fmt().init(); + + let args: Vec = std::env::args().collect(); + info!("Arguments passed to worker: {:?}", &args[1..]); + let host = args.get(1).ok_or("Missing host argument")?; + let port = args.get(2).ok_or("Missing port argument")?; + worker::worker_service::run(host, port).await +} diff --git a/pkg/data_cache/src/worker/indexable_mem_table.rs b/pkg/data_cache/src/worker/indexable_mem_table.rs new file mode 100644 index 0000000000..62d4e2b5bb --- /dev/null +++ b/pkg/data_cache/src/worker/indexable_mem_table.rs @@ -0,0 +1,419 @@ +use arrow::array::RecordBatch; +use arrow_schema::SchemaRef; +use async_trait::async_trait; +use datafusion::catalog::{Session, TableProvider}; +use datafusion::common::{Constraints, DataFusionError, ScalarValue, exec_err, plan_err}; +use datafusion::datasource::TableType; +use datafusion::datasource::memory::MemorySourceConfig; +use datafusion::execution::SessionState; +use datafusion::logical_expr::{BinaryExpr, Expr, TableProviderFilterPushDown}; +use datafusion::physical_plan::ExecutionPlan; +use futures::StreamExt; +use std::any::Any; +use std::sync::Arc; +use tracing::{error, info}; + +#[derive(Debug)] +pub struct IndexableMemTable { + schema: SchemaRef, + pub(crate) batches: Vec, + pub(crate) indices: Vec, +} + +impl IndexableMemTable { + pub fn try_new( + schema: SchemaRef, + partitions: Vec>, + indices: Vec, + ) -> datafusion::common::Result { + for batches in partitions.iter().flatten() { + let batches_schema = batches.schema(); + if !schema.contains(&batches_schema) { + error!( + "mem table schema does not contain batches schema. \ + Target_schema: {schema:?}. Batches Schema: {batches_schema:?}" + ); + return plan_err!("Mismatch between schema and batches"); + } + } + + Ok(Self { + schema, + batches: partitions.into_iter().flatten().collect(), + indices, + }) + } + + pub async fn load( + t: Arc, + _output_partitions: Option, + _state: &SessionState, + start_index: u64, + ) -> datafusion::common::Result { + let schema = t.schema(); + let exec = t.scan(_state, None, &[], None).await?; + + let mut data: Vec = vec![]; + let mut indices: Vec = vec![]; + + let mut current_index = start_index; + info!("Starting execution stream from exec: {:?}", exec.name()); + let mut stream = exec.execute(0, _state.task_ctx())?; + info!("Execution stream created successfully"); + + // Collect all batches from the execution stream + // This will trigger the fallback logic in WorkerExec if Iceberg has no data + let mut batch_count = 0; + while let Some(batch_result) = stream.next().await { + batch_count += 1; + info!("Received batch #{} from stream", batch_count); + match batch_result { + Ok(batch) => { + let num_rows = batch.num_rows(); + info!("Batch #{} has {} rows", batch_count, num_rows); + if num_rows > 0 { + indices.push(current_index); + current_index += num_rows as u64; + data.push(batch); + info!( + "Loaded batch with {} rows, current_index now {}", + num_rows, current_index + ); + } else { + info!("Skipping empty batch"); + } + } + Err(e) => { + error!("Error reading batch during data loading: {}", e); + return Err(e); + } + } + } + info!("Stream completed after {} batches", batch_count); + + info!("Number of batches loaded: {}", data.len()); + if !data.is_empty() { + info!( + "Successfully loaded {} total rows from {} to {}", + current_index - start_index, + start_index, + current_index - 1 + ); + + // Use the actual schema from the loaded batches instead of the expected schema + let actual_schema = data[0].schema(); + info!("Using actual batch schema: {:?}", actual_schema); + IndexableMemTable::try_new(actual_schema, vec![data], indices) + } else { + error!( + "No batches loaded from data source - fallback logic should have been triggered but produced no data" + ); + IndexableMemTable::try_new(Arc::clone(&schema), vec![data], indices) + } + } +} + +async fn fetch_partitions( + batches: Vec, + indices: &[u64], + start: u64, + end: u64, +) -> Vec { + let mut left = 0; + let mut right = indices.len(); + + while left < right { + let mid = left + (right - left) / 2; + if indices[mid] < start { + left = mid + 1; + } else { + right = mid; + } + } + let start_index = left; + + right = indices.len(); + + while left < right { + let mid = left + (right - left) / 2; + if indices[mid] <= end { + left = mid + 1; + } else { + right = mid; + } + } + let end_index = right; + + if batches.is_empty() { + vec![] + } else if start_index < end_index { + batches[start_index..=end_index - 1].to_owned() + } else if start_index == end_index && end_index > 0 { + vec![batches[end_index - 1].to_owned()] + } else { + vec![] + } +} + +#[async_trait] +impl TableProvider for IndexableMemTable { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + Arc::clone(&self.schema) + } + + fn constraints(&self) -> Option<&Constraints> { + None + } + + fn table_type(&self) -> TableType { + TableType::Base + } + + async fn scan( + &self, + _state: &dyn Session, + projection: Option<&Vec>, + _filters: &[Expr], + _limit: Option, + ) -> datafusion::common::Result> { + let (start, end) = if _filters.len() == 1 { + info!("{:?}", _filters[0]); + let start = collect_literals(&_filters[0]).ok_or_else(|| { + DataFusionError::Execution( + "Failed to extract start value from first filter".to_string(), + ) + })?; + (start, start) + } else if _filters.len() == 2 { + info!("{:?}", _filters[0]); + info!("{:?}", _filters[1]); + let start = collect_literals(&_filters[0]).ok_or_else(|| { + DataFusionError::Execution( + "Failed to extract start value from first filter".to_string(), + ) + })?; + let end = collect_literals(&_filters[1]).ok_or_else(|| { + DataFusionError::Execution( + "Failed to extract end value from second filter".to_string(), + ) + })?; + (start, end) + } else { + return exec_err!("Incorrect filters"); + }; + let partitions = fetch_partitions(self.batches.clone(), &self.indices, start, end).await; + + let exec = + MemorySourceConfig::try_new_exec(&[partitions], self.schema(), projection.cloned())?; + + Ok(exec) + } + + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> datafusion::common::Result> { + Ok(vec![TableProviderFilterPushDown::Inexact; filters.len()]) + } +} + +fn collect_literals(expr: &Expr) -> Option { + match expr { + Expr::BinaryExpr(BinaryExpr { + left: _, + op: _, + right, + }) => { + if let Expr::Literal(scalar) = &**right { + if let ScalarValue::UInt64(Some(val)) = scalar { + return Some(*val); + } + } + None + } + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::UInt64Array; + use arrow::{ + array::StringArray, + datatypes::{DataType, Field, Schema}, + record_batch::RecordBatch, + }; + use datafusion::assert_batches_eq; + use datafusion::datasource::MemTable; + use datafusion::prelude::{SessionConfig, SessionContext}; + use std::sync::Arc; + + fn create_schema() -> Arc { + Arc::new(Schema::new(vec![ + Field::new("id", DataType::UInt64, false), + Field::new("name", DataType::Utf8, false), + ])) + } + // Create sample RecordBatches for testing + fn create_test_batches() -> Result, Box> { + let schema = create_schema(); + Ok(vec![ + RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(UInt64Array::from(vec![0, 1, 2, 3])), + Arc::new(StringArray::from(vec!["A", "B", "C", "D"])), + ], + )?, + RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(UInt64Array::from(vec![4, 5, 6, 7])), + Arc::new(StringArray::from(vec!["E", "F", "G", "H"])), + ], + )?, + ]) + } + + // #[test] + // fn test_basic_and_condition_with_filter() { + // let expr = col("a").eq(lit(1u64)).and(col("b").eq(lit(2u64))); + // let df_schema = DFSchema::try_from(create_schema()).unwrap(); + // let physical_expr = SessionContext::new().create_physical_expr(expr, &df_schema).unwrap(); + // let a = collect_literals(&physical_expr).unwrap(); + // let b = collect_literals(&physical_expr[1]).unwrap(); + // assert_eq!(a, 1); + // assert_eq!(b, 2); + // } + + #[tokio::test] + async fn test_partition_selection() -> Result<(), Box> { + let batches = create_test_batches()?; + let result = fetch_partitions(batches, &[0, 4], 0, 2).await; + + assert_batches_eq!( + [ + "+----+------+", + "| id | name |", + "+----+------+", + "| 0 | A |", + "| 1 | B |", + "| 2 | C |", + "| 3 | D |", + "+----+------+" + ], + &result + ); + Ok(()) + } + + #[tokio::test] + async fn test_partition_selection_single_row() -> Result<(), Box> { + let batches = create_test_batches()?; + let result = fetch_partitions(batches, &[0, 4], 2, 2).await; + + assert_batches_eq!( + [ + "+----+------+", + "| id | name |", + "+----+------+", + "| 0 | A |", + "| 1 | B |", + "| 2 | C |", + "| 3 | D |", + "+----+------+" + ], + &result + ); + Ok(()) + } + + #[tokio::test] + async fn test_partition_selection_all() -> Result<(), Box> { + let batches = create_test_batches()?; + let result = fetch_partitions(batches, &[0, 4], 0, 4).await; + // println!("{}", &result.get(0).unwrap().num_rows()); + // println!("{}", &result.get(1).unwrap().num_rows()); + assert_batches_eq!( + [ + "+----+------+", + "| id | name |", + "+----+------+", + "| 0 | A |", + "| 1 | B |", + "| 2 | C |", + "| 3 | D |", + "| 4 | E |", + "| 5 | F |", + "| 6 | G |", + "| 7 | H |", + "+----+------+" + ], + &result + ); + Ok(()) + } + + #[tokio::test] + async fn test_indexable_mem_table_with_scan() -> Result<(), Box> { + let schema = create_schema(); + let rb = create_test_batches()?; + let mem_table = IndexableMemTable::try_new(schema, vec![rb], [0, 4].to_vec())?; + + let ctx = SessionContext::new(); + ctx.register_table("test_table", Arc::new(mem_table))?; + let df = ctx + .sql("SELECT id, name FROM test_table where id >= 0 AND id <= 2") + .await?; + let result = df.collect().await?; + assert_batches_eq!( + [ + "+----+------+", + "| id | name |", + "+----+------+", + "| 0 | A |", + "| 1 | B |", + "| 2 | C |", + "+----+------+" + ], + &result + ); + Ok(()) + } + + #[tokio::test] + async fn test_indexable_mem_table_with_load_and_scan() -> Result<(), Box> + { + let schema = create_schema(); + let rb = create_test_batches()?; + let config = SessionConfig::new().with_batch_size(1024); + let ctx = SessionContext::new_with_config(config); + let mem_table = MemTable::try_new(schema, vec![rb])?; + let indexable_mem_table = + IndexableMemTable::load(Arc::new(mem_table), None, &ctx.state(), 0).await?; + + ctx.register_table("test_table", Arc::new(indexable_mem_table))?; + let df = ctx + .sql("SELECT id, name FROM test_table where id >= 0 AND id <= 2") + .await?; + let result = df.collect().await?; + assert_batches_eq!( + [ + "+----+------+", + "| id | name |", + "+----+------+", + "| 0 | A |", + "| 1 | B |", + "| 2 | C |", + "+----+------+" + ], + &result + ); + Ok(()) + } +} diff --git a/pkg/data_cache/src/worker/mod.rs b/pkg/data_cache/src/worker/mod.rs new file mode 100644 index 0000000000..c910dc143b --- /dev/null +++ b/pkg/data_cache/src/worker/mod.rs @@ -0,0 +1,6 @@ +#[path = "../config/mod.rs"] +pub mod config; +mod indexable_mem_table; +pub mod worker; +pub mod worker_datasource; +pub mod worker_service; diff --git a/pkg/data_cache/src/worker/worker.rs b/pkg/data_cache/src/worker/worker.rs new file mode 100644 index 0000000000..72cfb8275a --- /dev/null +++ b/pkg/data_cache/src/worker/worker.rs @@ -0,0 +1,50 @@ +use crate::worker::indexable_mem_table::IndexableMemTable; +use crate::worker::worker_datasource::WorkerDataSource; +use datafusion::common::DataFusionError; +use datafusion::prelude::SessionContext; +use std::sync::Arc; +use tracing::{error, info}; + +pub struct DataLoader { + data_source: Arc, +} + +impl DataLoader { + pub(crate) async fn new( + metadata_loc: String, + table_name: String, + schema_name: String, + file_urls: Vec, + start_index: u64, + ) -> Result> { + let data_source = WorkerDataSource::new( + metadata_loc, + table_name, + schema_name, + file_urls, + start_index, + ) + .await?; + Ok(Self { + data_source: Arc::new(data_source), + }) + } + pub(crate) async fn load_data( + &self, + ctx: &SessionContext, + table: &str, + start_index: u64, + ) -> datafusion::common::Result<()> { + let memtable = + IndexableMemTable::load(self.data_source.clone(), None, &ctx.state(), start_index) + .await + .map_err(|error: DataFusionError| { + error!("Error loading table: {:?}", error); + error + })?; + ctx.register_table(table, Arc::new(memtable)) + .map_err(|e| DataFusionError::Execution(format!("Failed to register table: {}", e)))?; + info!("loaded and registered table"); + Ok(()) + } +} diff --git a/pkg/data_cache/src/worker/worker_datasource.rs b/pkg/data_cache/src/worker/worker_datasource.rs new file mode 100644 index 0000000000..ea6f44cb1f --- /dev/null +++ b/pkg/data_cache/src/worker/worker_datasource.rs @@ -0,0 +1,798 @@ +use arrow::array::UInt64Array; +use arrow::record_batch::RecordBatch; +use arrow_schema::{DataType, Field, Schema, SchemaBuilder, SchemaRef}; +use async_trait::async_trait; +use datafusion::catalog::{Session, TableProvider}; +use datafusion::datasource::TableType; +use datafusion::error::Result; +use datafusion::execution::{SendableRecordBatchStream, TaskContext}; +use datafusion::logical_expr::Expr; +use datafusion::physical_expr::EquivalenceProperties; +use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::physical_plan::{ + DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, RecordBatchStream, +}; +use futures::{Stream, StreamExt, TryStreamExt, future}; +use iceberg::TableIdent; +use iceberg::arrow::schema_to_arrow_schema; +use iceberg::io::FileIO; +use iceberg::scan::{FileScanTask, FileScanTaskStream}; +use iceberg::table::{StaticTable, Table}; +use iceberg_datafusion::{from_datafusion_error, to_datafusion_error}; +use object_store::aws::AmazonS3Builder; +use std::any::Any; +use std::fmt::{Debug, Formatter}; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; +use tracing::{error, info}; +use url::Url; + +/// Worker node data source that manages **data schemas** for distributed Arrow caching. +/// +/// **IMPORTANT**: This component handles the **data schema** (actual data structure) +/// which is completely separate from the **metadata schema** used by the head node +/// for coordination. See [`metadata_arrow_schema()`] for head node coordination schema. +/// +/// # Dual Schema Architecture - Worker Data Processing +/// +/// This worker data source manages **two distinct Arrow schemas** for data processing: +/// +/// 1. **`table_schema`** - Original data schema from Iceberg: +/// - Source: Iceberg table metadata converted to Arrow format +/// - Conversion: `iceberg::arrow::schema_to_arrow_schema(table.metadata().current_schema())` +/// - Purpose: Represents the raw data structure for reading files +/// - Example: `[id: Int64, user_id: String, event_type: String, timestamp: Timestamp]` +/// +/// 2. **`output_schema`** - Enhanced data schema for caching: +/// - Source: `table_schema` + additional `cache_index` column +/// - Purpose: Provides global row ordering across distributed workers +/// - Example: `[id: Int64, user_id: String, event_type: String, timestamp: Timestamp, cache_index: UInt64]` +/// +/// # Schema vs Metadata Schema Separation +/// +/// **Worker Data Schemas** (this component): +/// - Describe actual data structure (columns, types, semantics) +/// - Converted from Iceberg metadata to Arrow format +/// - Enhanced with caching columns for efficient lookups +/// - Used for query execution and data processing +/// +/// **Head Node Metadata Schema** (coordination only): +/// - Describes worker coordination (worker_ids, row ranges, file paths) +/// - Created by [`metadata_arrow_schema()`] function +/// - No relationship to actual data structure +/// - Used for distributed query planning and coordination +/// +/// # Architecture +/// +/// The worker data source operates as part of a head-worker architecture: +/// - Head node assigns specific file URLs using metadata schema +/// - Worker loads assigned data files using data schemas (this component) +/// - Adds a `cache_index` column for global row ordering +/// - Supports streaming data processing with bounded memory usage +/// +/// # Data Schema Conversion Flow +/// +/// ```text +/// Iceberg Table Metadata +/// │ (contains original data schema) +/// ▼ +/// iceberg::arrow::schema_to_arrow_schema() +/// │ +/// ▼ +/// table_schema: SchemaRef +/// │ (e.g., [id, user_id, event_type, timestamp]) +/// ▼ +/// + cache_index column +/// │ +/// ▼ +/// output_schema: SchemaRef +/// │ (e.g., [id, user_id, event_type, timestamp, cache_index]) +/// ▼ +/// Used for query execution +/// ``` +/// +/// # Data Flow with Schema Usage +/// +/// ```text +/// ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +/// │ Head Node │───▶│ WorkerDataSource│───▶│ IndexColumn │ +/// │(metadata schema)│ │ (data schemas) │ │ Exec │ +/// └─────────────────┘ └─────────────────┘ └─────────────────┘ +/// │ │ +/// ▼ ▼ +/// ┌─────────────────┐ ┌─────────────────┐ +/// │ WorkerExec │ │ Row Numbering │ +/// │(table_schema) │ │(output_schema) │ +/// └─────────────────┘ └─────────────────┘ +/// ``` +/// +/// # Example Data Schema Evolution +/// +/// **Original Iceberg Schema**: +/// ```text +/// ┌────────┬─────────┬────────────┬─────────────┐ +/// │ id │ user_id │ event_type │ timestamp │ +/// │ Int64 │ String │ String │ Timestamp │ +/// └────────┴─────────┴────────────┴─────────────┘ +/// ``` +/// +/// **table_schema** (converted to Arrow): +/// ```text +/// ┌────────┬─────────┬────────────┬─────────────┐ +/// │ id │ user_id │ event_type │ timestamp │ +/// │ Int64 │ String │ String │ Timestamp │ +/// └────────┴─────────┴────────────┴─────────────┘ +/// ``` +/// +/// **output_schema** (enhanced for caching): +/// ```text +/// ┌────────┬─────────┬────────────┬─────────────┬─────────────┐ +/// │ id │ user_id │ event_type │ timestamp │ cache_index │ +/// │ Int64 │ String │ String │ Timestamp │ UInt64 │ +/// └────────┴─────────┴────────────┴─────────────┴─────────────┘ +/// ``` +/// +/// # Performance Considerations +/// +/// - Only loads files assigned to this worker (reduces I/O) +/// - Streams data to minimize memory footprint +/// - `cache_index` enables efficient range-based queries +/// - Uses Iceberg's native file filtering capabilities +/// - Schema conversion happens once during initialization +/// +/// # See Also +/// +/// ## Data Schema Components: +/// - [`WorkerExec`]: Execution plan for loading data using table_schema +/// - [`IndexColumnExec`]: Execution plan for adding cache_index using output_schema +/// - [`RowNumberStream`]: Stream processor for row numbering with output_schema +/// - [`iceberg::arrow::schema_to_arrow_schema`]: Converts Iceberg schema to Arrow +/// +/// ## Metadata Schema (Head Node Coordination): +/// - [`metadata_arrow_schema()`]: Creates coordination schema (separate from data) +/// - [`HeadService`]: Uses metadata schema for worker coordination +pub struct WorkerDataSource { + file_urls: Vec, + start_index: u64, + inner: Table, + /// **Enhanced data schema** for query execution and caching operations. + /// + /// This schema includes: + /// - All original data columns from the Iceberg table + /// - Additional `cache_index` column (UInt64) for efficient indexing + /// + /// Used by query execution plans and result generation. + /// Example: `[id: Int64, user_id: String, event_type: String, timestamp: Timestamp, cache_index: UInt64]` + output_schema: SchemaRef, + /// **Original data schema** converted from Iceberg table metadata to Arrow format. + /// + /// This represents the raw data structure as defined in the Iceberg table, + /// without any caching enhancements. Used for reading and processing data files. + /// + /// Converted using: `iceberg::arrow::schema_to_arrow_schema(table.metadata().current_schema())` + /// Example: `[id: Int64, user_id: String, event_type: String, timestamp: Timestamp]` + table_schema: SchemaRef, +} + +impl WorkerDataSource { + pub(crate) async fn new( + metadata_loc: String, + table_name: String, + schema_name: String, + file_urls: Vec, + start_index: u64, + ) -> Result> { + let file_io = FileIO::from_path(&metadata_loc) + .map_err(|e| format!("Failed to create FileIO: {}", e))? + .build() + .map_err(|e| format!("Failed to build FileIO: {}", e))?; + let table_indent = TableIdent::from_strs([schema_name, table_name]) + .map_err(|e| format!("Failed to create table ident: {}", e))?; + let static_table = + StaticTable::from_metadata_file(&metadata_loc, table_indent, file_io.clone()) + .await + .map_err(|e| format!("Failed to load static table: {}", e))?; + let table = static_table.into_table(); + + // STEP 1: Convert Iceberg data schema to Arrow format + // This creates the table_schema containing the original data columns + let schema = Arc::new( + schema_to_arrow_schema(table.metadata().current_schema()) + .map_err(|e| format!("Failed to convert schema: {}", e))?, + ); + + // STEP 2: Create enhanced schema by adding cache_index column + // This creates the output_schema used for query execution and caching + let fields = schema.fields().clone(); + let mut builder = SchemaBuilder::from(&fields); + builder.push(Field::new("cache_index", DataType::UInt64, false)); // TODO:// validate name collision + let output_schema = Arc::new(Schema::new(builder.finish().fields)); + Ok(Self { + file_urls, + start_index, + inner: table, + output_schema, + table_schema: schema, + }) + } +} + +impl Debug for WorkerDataSource { + fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +#[async_trait] +impl TableProvider for WorkerDataSource { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.output_schema.clone() + } + + fn table_type(&self) -> TableType { + TableType::Base + } + + async fn scan( + &self, + _state: &dyn Session, + _projection: Option<&Vec>, + _filters: &[Expr], + _limit: Option, + ) -> datafusion::error::Result> { + info!("creating exec to fetch data from iceberg table"); + let iceberg_exec = WorkerExec::new( + self.file_urls.clone(), + self.inner.clone(), + self.table_schema.clone(), + ); + Ok(Arc::new(IndexColumnExec::new( + Arc::new(iceberg_exec), + self.output_schema.clone(), + self.start_index, + ))) + + //TODO: Support scanning with subset of datafiles + //TODO: Support projection with selected columns - Exec init computes projected schema and add its to props. Converts to column names and passes it to exec + } +} + +/// Execution plan for loading assigned data files on worker nodes. +/// +/// This execution plan reads specific data files assigned by the head node from +/// an Iceberg table. It filters the table's file scan tasks to only process files +/// that have been assigned to this worker node. +/// +/// # Algorithm +/// +/// 1. Build an Iceberg table reader from metadata +/// 2. Plan all available files from the table +/// 3. Filter file scan tasks to only include assigned file URLs +/// 4. Stream data from the filtered files +/// +/// # File Filtering +/// +/// The execution plan only processes files that match the assigned file URLs: +/// - Reduces I/O by avoiding unnecessary file reads +/// - Maintains data locality and distribution as planned by head node +/// - Uses Iceberg's built-in file filtering capabilities +/// +/// # Performance Considerations +/// +/// - Single partition output (worker processes assigned files sequentially) +/// - Streaming execution to minimize memory usage +/// - Respects data file concurrency limits for controlled resource usage +/// - Filters at the file level before reading data +/// +/// # See Also +/// +/// - [`WorkerDataSource`]: The table provider that creates this execution plan +/// - [`IndexColumnExec`]: Wraps this plan to add cache indexing +/// - [`read_stream`]: Async function that performs the actual file reading +pub struct WorkerExec { + file_urls: Vec, + inner: Table, + schema: SchemaRef, + plan_properties: PlanProperties, +} + +impl WorkerExec { + fn new(file_urls: Vec, inner: Table, schema: SchemaRef) -> Self { + let eq_properties = EquivalenceProperties::new_with_orderings(schema.clone(), &[]); + let plan_properties = PlanProperties::new( + eq_properties, // Equivalence Properties + datafusion::physical_expr::Partitioning::UnknownPartitioning(1), // Output Partitioning + EmissionType::Both, + Boundedness::Bounded, // Execution Mode + ); + Self { + file_urls, + inner, + schema, + plan_properties, + } + } +} + +impl Debug for WorkerExec { + fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl DisplayAs for WorkerExec { + fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result { + match t { + DisplayFormatType::Default | DisplayFormatType::Verbose => { + write!(f, "WorkerExec: files={}", self.file_urls.len()) + } + DisplayFormatType::TreeRender => { + write!(f, "files={}", self.file_urls.len()) + } + } + } +} + +impl ExecutionPlan for WorkerExec { + fn name(&self) -> &str { + "WorkerExec" + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn properties(&self) -> &PlanProperties { + &self.plan_properties + } + + fn children(&self) -> Vec<&Arc<(dyn ExecutionPlan + 'static)>> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Ok(self) + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + info!( + "WorkerExec::execute called with file_urls: {:?}", + self.file_urls + ); + let stream = futures::stream::once(read_stream(self.inner.clone(), self.file_urls.clone())) + .try_flatten(); + info!("WorkerExec::execute created stream, returning RecordBatchStreamAdapter"); + Ok(Box::pin(RecordBatchStreamAdapter::new( + self.schema.clone(), + stream, + ))) + } +} + +/// Execution plan that adds a cache index column to incoming data streams. +/// +/// This execution plan wraps another execution plan and adds a monotonically +/// increasing `cache_index` column to each RecordBatch. The index provides +/// global row ordering across all workers in the distributed system. +/// +/// # Algorithm +/// +/// 1. Execute the wrapped input execution plan +/// 2. For each incoming RecordBatch: +/// - Add a `cache_index` column with sequential row numbers +/// - Start numbering from the provided `start_index` +/// - Increment the counter for subsequent batches +/// +/// # Index Calculation +/// +/// The cache index is calculated as: +/// - First row in first batch: `start_index` +/// - Last row in first batch: `start_index + batch_size - 1` +/// - First row in second batch: `start_index + batch_size` +/// - And so on... +/// +/// # Schema Modification +/// +/// The output schema includes all original columns plus: +/// - `cache_index`: UInt64 column with globally unique row identifiers +/// - Column is appended to the end of the schema +/// - Non-nullable (every row gets an index) +/// +/// # Performance Considerations +/// +/// - Minimal overhead: only adds one column per batch +/// - Streaming execution: processes batches as they arrive +/// - Memory efficient: doesn't buffer entire dataset +/// - Preserves original data ordering and partitioning +/// +/// # See Also +/// +/// - [`WorkerDataSource`]: Creates this execution plan +/// - [`RowNumberStream`]: The stream that performs the index addition +/// - [`WorkerExec`]: The typical input execution plan +pub struct IndexColumnExec { + input: Arc, + schema: SchemaRef, + plan_properties: PlanProperties, + start_index: u64, +} + +impl IndexColumnExec { + fn new(input: Arc, schema: SchemaRef, start_index: u64) -> Self { + let eq_properties = EquivalenceProperties::new_with_orderings(schema.clone(), &[]); + let plan_properties = PlanProperties::new( + eq_properties, // Equivalence Properties + datafusion::physical_expr::Partitioning::UnknownPartitioning(1), // Output Partitioning + EmissionType::Both, + Boundedness::Bounded, // Execution Mode + ); + Self { + input, + schema, + plan_properties, + start_index, + } + } +} + +impl Debug for IndexColumnExec { + fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl DisplayAs for IndexColumnExec { + fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result { + match t { + DisplayFormatType::Default | DisplayFormatType::Verbose => { + write!(f, "IndexColumnExec: start_index={}", self.start_index) + } + DisplayFormatType::TreeRender => { + write!(f, "start_index={}", self.start_index) + } + } + } +} + +impl ExecutionPlan for IndexColumnExec { + fn name(&self) -> &str { + "IndexColumnExec" + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn properties(&self) -> &PlanProperties { + &self.plan_properties + } + + fn children(&self) -> Vec<&Arc<(dyn ExecutionPlan + 'static)>> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Ok(self) + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + let stream = self.input.execute(_partition, _context)?; + Ok(Box::pin(RowNumberStream::new( + stream, + self.schema.clone(), + self.start_index, + ))) + } +} + +/// Stream processor that adds cache index column to RecordBatches. +/// +/// This stream wraps another RecordBatchStream and adds a monotonically +/// increasing `cache_index` column to each batch. The index provides global +/// row ordering across the distributed system. +/// +/// # Row Numbering +/// +/// Each row receives a unique index based on: +/// - Starting index provided during construction +/// - Sequential numbering across all batches +/// - Continuous numbering (no gaps between batches) +/// +/// # Performance Characteristics +/// +/// - Streaming: processes one batch at a time +/// - Low memory overhead: only stores current row counter +/// - Preserves batch boundaries and ordering +/// - Efficient array construction using range operations +/// +/// # See Also +/// +/// - [`IndexColumnExec`]: The execution plan that creates this stream +/// - [`RecordBatchStream`]: The trait this stream implements +pub struct RowNumberStream { + inner: SendableRecordBatchStream, + row_count: u64, + schema: SchemaRef, +} + +impl RowNumberStream { + pub fn new(inner: SendableRecordBatchStream, schema: SchemaRef, start_index: u64) -> Self { + RowNumberStream { + inner, + row_count: start_index, + schema, + } + } +} + +impl RecordBatchStream for RowNumberStream { + fn schema(&self) -> SchemaRef { + Arc::clone(&self.schema) + } +} + +impl Stream for RowNumberStream { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + match self.inner.poll_next_unpin(cx) { + Poll::Ready(Some(Ok(batch))) => { + let num_rows = batch.num_rows(); + + // Debug the schemas for diagnosis + let input_schema = batch.schema(); + let expected_schema = self.schema.clone(); + info!( + "RowNumberStream: Input batch schema: {:?} with {} columns", + input_schema, + input_schema.fields().len() + ); + info!( + "RowNumberStream: Expected output schema: {:?} with {} columns", + expected_schema, + expected_schema.fields().len() + ); + + // If the schemas don't match (except for the cache_index we'll add), create a new schema + // that combines the input columns with the cache_index column + let actual_schema = + if input_schema.fields().len() + 1 != expected_schema.fields().len() { + let mut builder = arrow_schema::SchemaBuilder::from(input_schema.fields()); + builder.push(Field::new("cache_index", DataType::UInt64, false)); + let new_schema = Arc::new(Schema::new(builder.finish().fields)); + info!( + "RowNumberStream: Created new compatible schema: {:?}", + new_schema + ); + new_schema + } else { + expected_schema + }; + + let mut new_columns = batch.columns().to_vec(); + + let row_numbers: UInt64Array = (self.row_count..self.row_count + num_rows as u64) + .collect::>() + .into(); + + new_columns.push(Arc::new(row_numbers)); + + // Use the compatible schema to create the new batch + let new_batch = RecordBatch::try_new(actual_schema, new_columns)?; + self.row_count += num_rows as u64; + + Poll::Ready(Some(Ok(new_batch))) + } + other => other, + } + } +} + +async fn read_stream( + table: Table, + file_urls: Vec, +) -> Result> + Send>>> { + info!("read_stream: Starting with file_urls: {:?}", file_urls); + let reader = table.reader_builder().build(); + + // Plan all files from Iceberg + info!("read_stream: Building Iceberg scan..."); + let mut planned = table + .scan() + .with_data_file_concurrency_limit(1) + .build() + .map_err(to_datafusion_error)? + .plan_files() + .await + .map_err(to_datafusion_error)?; + + // Collect and filter matching tasks against assigned file_urls + let file_urls_arc = Arc::new(file_urls.clone()); + let mut total_planned = 0usize; + let mut matched: Vec = Vec::new(); + info!("read_stream: Processing planned files..."); + while let Some(next) = planned.next().await { + match next { + Ok(task) => { + total_planned += 1; + info!("read_stream: Found file task: {}", task.data_file_path); + if file_urls_arc.contains(&task.data_file_path) { + info!("read_stream: File matches assigned files, adding to matched list"); + matched.push(task); + } else { + info!("read_stream: File does not match assigned files"); + } + } + Err(e) => { + error!("read_stream: Error in planning files: {}", e); + return Err(to_datafusion_error(e)); + } + } + } + + info!( + "read_stream: Iceberg scan completed - total_planned_files={}, matched_files={}", + total_planned, + matched.len() + ); + + // If Iceberg has no files or none matched, fallback to direct parquet reading + if total_planned == 0 || matched.is_empty() { + info!( + "read_stream: Falling back to direct Parquet reading (total_planned={}, matched={})", + total_planned, + matched.len() + ); + return read_parquet_files_directly((*file_urls_arc).clone()).await; + } + + // Build a stream from matched tasks and let Iceberg reader read them + info!( + "read_stream: Building Iceberg reader stream with {} matched files", + matched.len() + ); + let matched_stream = futures::stream::iter(matched.into_iter().map(Ok)); + let stream = reader + .read(Box::pin(matched_stream)) + .await + .map_err(to_datafusion_error)? + .map_err(to_datafusion_error); + info!("read_stream: Iceberg reader stream created successfully"); + Ok(Box::pin(stream)) +} + +#[allow(dead_code)] +async fn filter_and_create_stream( + result: Result, + file_urls: Arc>, +) -> Result> + Send>>> { + match result { + Ok(stream) => { + info!("File URLs to match: {:?}", file_urls); + Ok(Box::pin( + stream + .try_filter(move |task| { + info!( + "Checking file task path: '{}' against URLs: {:?}", + task.data_file_path, file_urls + ); + let matches = file_urls.clone().contains(&task.data_file_path); + info!("File '{}' matches: {}", task.data_file_path, matches); + future::ready(matches) + }) + .map(|result| result), + )) + } + Err(e) => Ok(Box::pin(futures::stream::once(future::ready(Err( + from_datafusion_error(e), + ))))), + } +} + +async fn read_parquet_files_directly( + file_urls: Vec, +) -> Result> + Send>>> { + info!("Reading Parquet files directly: {:?}", file_urls); + + use datafusion::prelude::SessionContext; + use std::env; + + // Create a DataFusion session context + let ctx = SessionContext::new(); + + // Configure S3 object store if needed + if !file_urls.is_empty() && file_urls[0].starts_with("s3://") { + let aws_access_key = env::var("AWS_ACCESS_KEY_ID").unwrap_or_default(); + let aws_secret_key = env::var("AWS_SECRET_ACCESS_KEY").unwrap_or_default(); + let aws_region = env::var("AWS_REGION").unwrap_or_else(|_| "us-east-1".to_string()); + + if !aws_access_key.is_empty() && !aws_secret_key.is_empty() { + // Extract unique bucket names from all S3 URLs + let mut buckets = std::collections::HashSet::new(); + for file_url in &file_urls { + if let Some(bucket) = file_url + .strip_prefix("s3://") + .and_then(|path| path.split('/').next()) + { + buckets.insert(bucket); + } + } + + // Register an object store for each unique bucket + for bucket in buckets { + let s3_store = AmazonS3Builder::new() + .with_access_key_id(&aws_access_key) + .with_secret_access_key(&aws_secret_key) + .with_region(&aws_region) + .with_bucket_name(bucket) + .build() + .map_err(|e| datafusion::error::DataFusionError::External(Box::new(e)))?; + + let bucket_url = Url::parse(&format!("s3://{}/", bucket)) + .map_err(|e| datafusion::error::DataFusionError::External(Box::new(e)))?; + ctx.runtime_env() + .register_object_store(&bucket_url, Arc::new(s3_store)); + info!("Registered S3 object store for bucket: {}", bucket); + } + } else { + info!("AWS credentials not available, S3 access may fail"); + } + } + + // Create a stream of record batches from all files + let mut all_batches = Vec::new(); + + for file_url in file_urls { + info!("Reading Parquet file: {}", file_url); + + // Read the Parquet file using DataFusion + let df = ctx + .read_parquet(&file_url, Default::default()) + .await + .map_err(|e| datafusion::error::DataFusionError::External(Box::new(e)))?; + + let batches = df.collect().await?; + let batches_len = batches.len(); + + // Log schema information for debugging + if !batches.is_empty() { + let batch_schema = batches[0].schema(); + info!("Parquet file {} schema: {:?}", file_url, batch_schema); + info!( + "Parquet file {} has {} columns", + file_url, + batch_schema.fields().len() + ); + } + + all_batches.extend(batches); + + info!("Loaded {} batches from {}", batches_len, file_url); + } + + info!("Total batches loaded: {}", all_batches.len()); + + // Create a stream from the collected batches + let stream = futures::stream::iter(all_batches.into_iter().map(Ok)); + Ok(Box::pin(stream)) +} diff --git a/pkg/data_cache/src/worker/worker_service.rs b/pkg/data_cache/src/worker/worker_service.rs new file mode 100644 index 0000000000..fbd3e4ea58 --- /dev/null +++ b/pkg/data_cache/src/worker/worker_service.rs @@ -0,0 +1,384 @@ +use super::config::config::DatasetConfig; +use crate::worker::worker::DataLoader; +use arrow::array::{ListArray, StringViewArray, UInt64Array}; +use arrow_flight::decode::FlightRecordBatchStream; +use arrow_flight::encode::FlightDataEncoderBuilder; +use arrow_flight::error::FlightError; +use arrow_flight::flight_service_server::FlightServiceServer; +use arrow_flight::{ + Action, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, + HandshakeResponse, PollInfo, PutResult, SchemaResult, Ticket, + flight_service_server::FlightService, +}; +use arrow_schema::DataType; +use bytes::Bytes; +use datafusion::prelude::{SessionConfig, SessionContext}; +use futures::{Stream, StreamExt, TryStreamExt}; +use serde::{Deserialize, Serialize}; +use std::pin::Pin; +use std::sync::Arc; +use tonic::{Request, Response, Status, Streaming}; +use tracing::info; + +/// Worker node service implementing Apache Arrow Flight protocol for distributed caching. +/// +/// This service provides the worker node functionality in the distributed Arrow +/// caching system. It receives data file assignments from the head node and +/// serves query results for specific row ranges. +/// +/// # Architecture +/// +/// The worker service operates as part of a head-worker distributed system: +/// - Receives file assignments via Flight `do_put` operations +/// - Loads assigned data files into memory tables +/// - Serves query results via Flight `do_get` operations +/// - Maintains cached data for efficient retrieval +/// +/// # Flight Protocol Usage +/// +/// - **`do_put`**: Receives file assignments and row indexing information +/// - **`do_get`**: Serves query results for specific row ranges +/// - **`get_schema`**: Returns schema information for cached data +/// - Other Flight methods are not currently implemented +/// +/// # Data Flow +/// +/// ```text +/// ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +/// │ Head Node │───▶│ WorkerService │───▶│ DataLoader │ +/// │ (do_put) │ │ (FlightService)│ │ (load files) │ +/// └─────────────────┘ └─────────────────┘ └─────────────────┘ +/// │ │ +/// ▼ ▼ +/// ┌─────────────────┐ ┌─────────────────┐ +/// ┌─────────────────┐ │ Memory Table │ │ Query Engine │ +/// │ Client │◀───┤ (cached data) │◀───┤ (DataFusion) │ +/// │ (do_get) │ └─────────────────┘ └─────────────────┘ +/// └─────────────────┘ +/// ``` +/// +/// # Performance Considerations +/// +/// - Data is cached in memory for fast query response +/// - Supports streaming queries for large result sets +/// - Uses DataFusion's query engine for efficient processing +/// - Maintains global row indexing for distributed coordination +/// +/// # See Also +/// +/// - [`DataLoader`]: Loads assigned data files into memory tables +/// - [`IndexPair`]: Represents row range queries in tickets +pub struct WorkerService { + metadata_loc: String, + table_name: String, + schema_name: String, + ctx: Arc, +} + +#[tonic::async_trait] +impl FlightService for WorkerService { + type HandshakeStream = + Pin> + Send + 'static>>; + type ListFlightsStream = + Pin> + Send + 'static>>; + type DoGetStream = Pin> + Send + 'static>>; + type DoPutStream = Pin> + Send + 'static>>; + type DoExchangeStream = + Pin> + Send + 'static>>; + type DoActionStream = + Pin> + Send + 'static>>; + type ListActionsStream = + Pin> + Send + 'static>>; + + async fn get_schema( + &self, + _request: Request, + ) -> Result, Status> { + unimplemented!() + } + + /// Retrieves data for a specific row range from the cached table. + /// + /// This method serves query results for a specific row range specified in the + /// ticket. The ticket contains a serialized [`IndexPair`] with start and end + /// row indices for the requested data range. + /// + /// # Parameters + /// + /// - `request`: Flight ticket containing serialized [`IndexPair`] with row range + /// + /// # Returns + /// + /// Returns a Flight data stream containing the requested rows from the cached + /// table, excluding the `cache_index` column from the result set. + /// + /// # Algorithm + /// + /// 1. Deserialize the [`IndexPair`] from the ticket + /// 2. Execute SQL query to select rows in the specified range + /// 3. Exclude the `cache_index` column from results + /// 4. Encode results as Flight data stream + /// + /// # Errors + /// + /// - Returns `Status::internal` if ticket deserialization fails + /// - Returns `Status::internal` if SQL query execution fails + /// - Returns `Status::internal` if stream creation fails + /// + /// # Example Query + /// + /// ```sql + /// SELECT * EXCEPT(cache_index) FROM memtable + /// WHERE cache_index >= {start} AND cache_index <= {end} + /// ``` + async fn do_get( + &self, + request: Request, + ) -> Result::DoGetStream>, Status> { + info!("querying worker"); + let ticket = request.into_inner(); + let pair = bincode::deserialize::(&ticket.ticket) + .map_err(|e| Status::internal(format!("Deserialization error: {}", e)))?; + info!("{:?}", pair); + let df = self.ctx.sql(format!("select * except(cache_index) from memtable where cache_index >= {} and cache_index <= {}", pair.start, pair.end).as_str()).await + .map_err(|e| Status::internal(format!("Error executing query: {}", e)))?; + let stream = df + .execute_stream() + .await + .map_err(|e| Status::internal(format!("Error creating stream: {}", e)))?; + + let encoder = FlightDataEncoderBuilder::new() + .build(stream.map_err(|e| FlightError::ExternalError(Box::new(e)))) + .map_err(Status::from); + Ok(Response::new(Box::pin(encoder))) + } + + async fn handshake( + &self, + _request: Request>, + ) -> Result, Status> { + todo!() + } + + async fn list_flights( + &self, + _request: Request, + ) -> Result, Status> { + todo!() + } + + async fn get_flight_info( + &self, + _request: Request, + ) -> Result, Status> { + todo!() + } + + async fn poll_flight_info( + &self, + _request: Request, + ) -> Result, Status> { + todo!() + } + + /// Receives file assignments and loads data into the worker's memory table. + /// + /// This method receives file assignments from the head node and loads the + /// specified data files into a memory table for caching. The request contains + /// file paths and row indexing information needed for distributed coordination. + /// + /// # Parameters + /// + /// - `request`: Flight data stream containing file assignment information + /// + /// # Expected Input Format + /// + /// The input RecordBatch must contain the following columns: + /// - `file_paths`: List - Array of file paths to load + /// - `row_start_indexes`: UInt64 - Starting row index for global ordering + /// + /// # Returns + /// + /// Returns a single [`PutResult`] indicating successful data loading. + /// + /// # Algorithm + /// + /// 1. Decode the incoming Flight data stream + /// 2. Extract file paths from the `file_paths` column + /// 3. Extract starting row index from `row_start_indexes` column + /// 4. Create a [`DataLoader`] with the assigned files + /// 5. Load data into the memory table named "memtable" + /// 6. Verify data loading with a test query + /// + /// # Error Handling + /// + /// - Returns `Status::internal` if no RecordBatch is received + /// - Returns `Status::internal` if required columns are missing + /// - Returns `Status::internal` if column types don't match expected format + /// - Returns `Status::internal` if data loading fails + /// + /// # Performance Considerations + /// + /// - Data is loaded into memory for fast subsequent queries + /// - Uses streaming to handle large file lists efficiently + /// - Maintains global row indexing for distributed coordination + /// + /// # See Also + /// + /// - [`DataLoader`]: Handles the actual file loading process + /// - [`do_get`]: Serves queries against the loaded data + async fn do_put( + &self, + request: Request>, + ) -> Result, Status> { + let record_batch = FlightRecordBatchStream::new_from_flight_data( + request.into_inner().map_err(|e| e.into()), + ) + .try_next() + .await + .map_err(|e| Status::internal(format!("Flight data error: {}", e)))? + .ok_or_else(|| Status::internal("No record batch received"))?; + let file_paths_column = record_batch + .column_by_name("file_paths") + .ok_or_else(|| Status::internal("file_paths column not found"))?; + let start_indexes_column = record_batch + .column_by_name("row_start_indexes") + .ok_or_else(|| Status::internal("row_start_indexes column not found"))?; + + let file_urls; + if let DataType::List(field) = file_paths_column.data_type() { + if field.data_type() == &DataType::Utf8View { + let list_array = file_paths_column + .as_any() + .downcast_ref::() + .ok_or_else(|| Status::internal("Failed to downcast to ListArray"))?; + let values = list_array.values(); + let string_array = values + .as_any() + .downcast_ref::() + .ok_or_else(|| Status::internal("Failed to downcast to StringViewArray"))?; + + file_urls = string_array + .iter() + .map(|opt_str| opt_str.map(|s| s.to_string()).unwrap_or_default()) + .collect(); + } else { + return Err(Status::internal( + "Expected List, found List with different item type", + )); + } + } else { + return Err(Status::internal("Expected List DataType")); + } + info!("file_urls received in worker: {:?}", file_urls); + + let start_index = if let DataType::UInt64 = start_indexes_column.data_type() { + let list_array = start_indexes_column + .as_any() + .downcast_ref::() + .ok_or_else(|| Status::internal("Failed to downcast to UInt64Array"))?; + let values = list_array.values(); + *values + .first() + .ok_or_else(|| Status::internal("No start index found"))? + } else { + return Err(Status::internal("Expected UInt64 DataType")); + }; + + info!("start_index received in worker: {:?}", start_index); + + let data_loader = DataLoader::new( + self.metadata_loc.clone(), + self.table_name.clone(), + self.schema_name.clone(), + file_urls, + start_index, + ) + .await + .map_err(|e| Status::internal(format!("Failed to create data loader: {}", e)))?; + data_loader + .load_data(&self.ctx.clone(), "memtable", start_index) + .await + .map_err(|e| Status::internal(format!("Failed to load data: {}", e)))?; + let df = self.ctx.sql(format!("select cache_index from memtable where cache_index >= {} and cache_index <= {}", start_index, start_index).as_str()) + .await.map_err(|e| Status::internal(format!("SQL error: {}", e)))? + .collect() + .await.map_err(|e| Status::internal(format!("Collection error: {}", e)))?; + info!("printing recordbatch"); + let _ = arrow::util::pretty::print_batches(&df); + + let app_metadata = Bytes::new(); + let result = PutResult { app_metadata }; + let stream = futures::stream::iter([Ok(result)]); + Ok(Response::new(stream.boxed())) + } + + async fn do_exchange( + &self, + _request: Request>, + ) -> Result, Status> { + todo!() + } + + async fn do_action( + &self, + _request: Request, + ) -> Result, Status> { + todo!() + } + + async fn list_actions( + &self, + _request: Request, + ) -> Result, Status> { + todo!() + } +} + +impl WorkerService { + #[allow(dead_code)] + pub fn new( + metadata_loc: String, + table_name: String, + schema_name: String, + ctx: Arc, + ) -> Self { + Self { + metadata_loc, + table_name, + schema_name, + ctx, + } + } +} + +pub async fn run( + host: &String, + port: &String, +) -> datafusion::common::Result<(), Box> { + let config = SessionConfig::new().with_batch_size(1024); + let ctx = Arc::new(SessionContext::new_with_config(config)); + let addr = format!("{host}:{port}").parse()?; + let dataset_config = + DatasetConfig::from_env().map_err(|e| format!("Failed to load dataset config: {}", e))?; + + let service = WorkerService { + metadata_loc: dataset_config.metadata_loc, + table_name: dataset_config.table_name, + schema_name: dataset_config.schema_name, + ctx: ctx.clone(), + }; + tonic::transport::Server::builder() + .add_service(FlightServiceServer::new(service)) + .serve(addr) + .await + .map_err(|e| format!("Error starting worker: {}", e))?; + Ok(()) +} + +//TODO: move to lib.rs +#[derive(Serialize, Deserialize, Debug)] +struct IndexPair { + start: u64, + end: u64, +}