diff --git a/.cargo/config.toml b/.cargo/config.toml deleted file mode 100644 index af951327f..000000000 --- a/.cargo/config.toml +++ /dev/null @@ -1,5 +0,0 @@ -[target.x86_64-apple-darwin] -rustflags = ["-C", "link-arg=-undefined", "-C", "link-arg=dynamic_lookup"] - -[target.aarch64-apple-darwin] -rustflags = ["-C", "link-arg=-undefined", "-C", "link-arg=dynamic_lookup"] diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 411e60291..000000000 --- a/.dockerignore +++ /dev/null @@ -1,12 +0,0 @@ -.cargo -.github -.pytest_cache -ci -conda -dev -docs -examples -parquet -target -testing -venv \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 5600dab98..000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve -title: '' -labels: bug -assignees: '' - ---- - -**Describe the bug** -A clear and concise description of what the bug is. - -**To Reproduce** -Steps to reproduce the behavior: - -**Expected behavior** -A clear and concise description of what you expected to happen. - -**Additional context** -Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index d9883dd45..000000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -name: Feature request -about: Suggest an idea for this project -title: '' -labels: enhancement -assignees: '' - ---- - -**Is your feature request related to a problem or challenge? Please describe what you are trying to do.** -A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -(This section helps Arrow developers understand the context and *why* for this feature, in addition to the *what*) - -**Describe the solution you'd like** -A clear and concise description of what you want to happen. - -**Describe alternatives you've considered** -A clear and concise description of any alternative solutions or features you've considered. - -**Additional context** -Add any other context or screenshots about the feature request here. diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 4058e8a6e..000000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,36 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -version: 2 -updates: - - - package-ecosystem: "cargo" - directory: "/" - schedule: - interval: "weekly" - day: "saturday" - open-pull-requests-limit: 20 - target-branch: main - - - package-ecosystem: "github-actions" - directory: "/" - schedule: - interval: "weekly" - day: "sunday" - open-pull-requests-limit: 20 - target-branch: main diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md deleted file mode 100644 index 18b90943f..000000000 --- a/.github/pull_request_template.md +++ /dev/null @@ -1,27 +0,0 @@ -# Which issue does this PR close? - - - -Closes #. - - # Rationale for this change - - -# What changes are included in this PR? - - -# Are there any user-facing changes? - - - \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index 455a0dc1a..000000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,534 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Reusable workflow for running building -# This ensures the same tests run for both debug (PRs) and release (main/tags) builds - -name: Build - -on: - workflow_call: - inputs: - build_mode: - description: 'Build mode: debug or release' - required: true - type: string - run_wheels: - description: 'Whether to build distribution wheels' - required: false - type: boolean - default: false - -env: - CARGO_TERM_COLOR: always - RUST_BACKTRACE: 1 - -jobs: - # ============================================ - # Linting Jobs - # ============================================ - lint-rust: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - - name: Setup Rust - uses: dtolnay/rust-toolchain@stable - with: - toolchain: "nightly" - components: rustfmt - - - name: Cache Cargo - uses: Swatinem/rust-cache@v2 - - - name: Check formatting - run: cargo +nightly fmt --all -- --check - - lint-python: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - - name: Install Python - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - - uses: astral-sh/setup-uv@v6 - with: - enable-cache: true - - - name: Install dependencies - run: uv sync --dev --no-install-package datafusion - - - name: Run Ruff - run: | - uv run --no-project ruff check --output-format=github python/ - uv run --no-project ruff format --check python/ - - - name: Run codespell - run: | - uv run --no-project codespell --toml pyproject.toml - - lint-toml: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - - name: Install taplo - uses: taiki-e/install-action@v2 - with: - tool: taplo-cli - - # if you encounter an error, try running 'taplo format' to fix the formatting automatically. - - name: Check Cargo.toml formatting - run: taplo format --check - - check-crates-patch: - if: inputs.build_mode == 'release' - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - - name: Ensure [patch.crates-io] is empty - run: python3 dev/check_crates_patch.py - - generate-license: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - - uses: astral-sh/setup-uv@v6 - with: - enable-cache: true - - - name: Install cargo-license - uses: taiki-e/install-action@v2 - with: - tool: cargo-license - - - name: Generate license file - run: uv run --no-project python ./dev/create_license.py - - - uses: actions/upload-artifact@v6 - with: - name: python-wheel-license - path: LICENSE.txt - - # ============================================ - # Build - Linux x86_64 - # ============================================ - build-manylinux-x86_64: - needs: [generate-license, lint-rust, lint-python] - name: ManyLinux x86_64 - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - - run: rm LICENSE.txt - - name: Download LICENSE.txt - uses: actions/download-artifact@v7 - with: - name: python-wheel-license - path: . - - - name: Setup Rust - uses: dtolnay/rust-toolchain@stable - - - name: Cache Cargo - uses: Swatinem/rust-cache@v2 - with: - key: ${{ inputs.build_mode }} - - - uses: astral-sh/setup-uv@v6 - with: - enable-cache: true - - - name: Build (release mode) - uses: PyO3/maturin-action@v1 - if: inputs.build_mode == 'release' - with: - target: x86_64-unknown-linux-gnu - manylinux: "2_28" - args: --release --strip --features protoc,substrait --out dist - rustup-components: rust-std - - - name: Build (debug mode) - uses: PyO3/maturin-action@v1 - if: inputs.build_mode == 'debug' - with: - target: x86_64-unknown-linux-gnu - manylinux: "2_28" - args: --features protoc,substrait --out dist - rustup-components: rust-std - - - name: Build FFI test library - uses: PyO3/maturin-action@v1 - with: - target: x86_64-unknown-linux-gnu - manylinux: "2_28" - working-directory: examples/datafusion-ffi-example - args: --out dist - rustup-components: rust-std - - - name: Archive wheels - uses: actions/upload-artifact@v6 - with: - name: dist-manylinux-x86_64 - path: dist/* - - - name: Archive FFI test wheel - uses: actions/upload-artifact@v6 - with: - name: test-ffi-manylinux-x86_64 - path: examples/datafusion-ffi-example/dist/* - - # ============================================ - # Build - Linux ARM64 - # ============================================ - build-manylinux-aarch64: - needs: [generate-license, lint-rust, lint-python] - name: ManyLinux arm64 - runs-on: ubuntu-24.04-arm - steps: - - uses: actions/checkout@v6 - - - run: rm LICENSE.txt - - name: Download LICENSE.txt - uses: actions/download-artifact@v7 - with: - name: python-wheel-license - path: . - - - name: Setup Rust - uses: dtolnay/rust-toolchain@stable - - - name: Cache Cargo - uses: Swatinem/rust-cache@v2 - with: - key: ${{ inputs.build_mode }} - - - uses: astral-sh/setup-uv@v6 - with: - enable-cache: true - - - name: Build (release mode) - uses: PyO3/maturin-action@v1 - if: inputs.build_mode == 'release' - with: - target: aarch64-unknown-linux-gnu - manylinux: "2_28" - args: --release --strip --features protoc,substrait --out dist - rustup-components: rust-std - - - name: Build (debug mode) - uses: PyO3/maturin-action@v1 - if: inputs.build_mode == 'debug' - with: - target: aarch64-unknown-linux-gnu - manylinux: "2_28" - args: --features protoc,substrait --out dist - rustup-components: rust-std - - - name: Archive wheels - uses: actions/upload-artifact@v6 - if: inputs.build_mode == 'release' - with: - name: dist-manylinux-aarch64 - path: dist/* - - # ============================================ - # Build - macOS arm64 / Windows - # ============================================ - build-python-mac-win: - needs: [generate-license, lint-rust, lint-python] - name: macOS arm64 & Windows - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - python-version: ["3.10"] - os: [macos-latest, windows-latest] - steps: - - uses: actions/checkout@v6 - - - uses: dtolnay/rust-toolchain@stable - - - run: rm LICENSE.txt - - name: Download LICENSE.txt - uses: actions/download-artifact@v7 - with: - name: python-wheel-license - path: . - - - name: Cache Cargo - uses: Swatinem/rust-cache@v2 - with: - key: ${{ inputs.build_mode }} - - - uses: astral-sh/setup-uv@v7 - with: - enable-cache: true - - - name: Install Protoc - uses: arduino/setup-protoc@v3 - with: - version: "27.4" - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Install dependencies - run: uv sync --dev --no-install-package datafusion - - # Run clippy BEFORE maturin so we can avoid rebuilding. The features must match - # exactly the features used by maturin. Linux maturin builds need to happen in a - # container so only run this for our mac runner. - - name: Run Clippy - if: matrix.os != 'windows-latest' - run: cargo clippy --no-deps --all-targets --features substrait -- -D warnings - - - name: Build Python package (release mode) - if: inputs.build_mode == 'release' - run: uv run --no-project maturin build --release --strip --features substrait - - - name: Build Python package (debug mode) - if: inputs.build_mode != 'release' - run: uv run --no-project maturin build --features substrait - - - name: List Windows wheels - if: matrix.os == 'windows-latest' - run: dir target\wheels\ - # since the runner is dynamic shellcheck (from actionlint) can't infer this is powershell - # so we specify it explicitly - shell: powershell - - - name: List Mac wheels - if: matrix.os != 'windows-latest' - run: find target/wheels/ - - - name: Archive wheels - uses: actions/upload-artifact@v6 - if: inputs.build_mode == 'release' - with: - name: dist-${{ matrix.os }} - path: target/wheels/* - - # ============================================ - # Build - macOS x86_64 (release only) - # ============================================ - build-macos-x86_64: - if: inputs.build_mode == 'release' - needs: [generate-license, lint-rust, lint-python] - runs-on: macos-15-intel - strategy: - fail-fast: false - matrix: - python-version: ["3.10"] - steps: - - uses: actions/checkout@v6 - - - uses: dtolnay/rust-toolchain@stable - - - run: rm LICENSE.txt - - name: Download LICENSE.txt - uses: actions/download-artifact@v7 - with: - name: python-wheel-license - path: . - - - name: Cache Cargo - uses: Swatinem/rust-cache@v2 - with: - key: ${{ inputs.build_mode }} - - - uses: astral-sh/setup-uv@v7 - with: - enable-cache: true - - - name: Install Protoc - uses: arduino/setup-protoc@v3 - with: - version: "27.4" - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Install dependencies - run: uv sync --dev --no-install-package datafusion - - - name: Build (release mode) - run: | - uv run --no-project maturin build --release --strip --features substrait - - - name: List Mac wheels - run: find target/wheels/ - - - name: Archive wheels - uses: actions/upload-artifact@v6 - with: - name: dist-macos-aarch64 - path: target/wheels/* - - # ============================================ - # Build - Source Distribution - # ============================================ - - build-sdist: - needs: [generate-license] - name: Source distribution - if: inputs.build_mode == 'release' - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - run: rm LICENSE.txt - - name: Download LICENSE.txt - uses: actions/download-artifact@v7 - with: - name: python-wheel-license - path: . - - run: cat LICENSE.txt - - name: Build sdist - uses: PyO3/maturin-action@v1 - with: - rust-toolchain: stable - manylinux: auto - rustup-components: rust-std rustfmt - args: --release --sdist --out dist --features protoc,substrait - - name: Assert sdist build does not generate wheels - run: | - if [ "$(ls -A target/wheels)" ]; then - echo "Error: Sdist build generated wheels" - exit 1 - else - echo "Directory is clean" - fi - shell: bash - - # ============================================ - # Build - Source Distribution - # ============================================ - - merge-build-artifacts: - runs-on: ubuntu-latest - name: Merge build artifacts - if: inputs.build_mode == 'release' - needs: - - build-python-mac-win - - build-macos-x86_64 - - build-manylinux-x86_64 - - build-manylinux-aarch64 - - build-sdist - steps: - - name: Merge Build Artifacts - uses: actions/upload-artifact/merge@v6 - with: - name: dist - pattern: dist-* - - # ============================================ - # Build - Documentation - # ============================================ - # Documentation build job that runs after wheels are built - build-docs: - name: Build docs - runs-on: ubuntu-latest - needs: [build-manylinux-x86_64] # Only need the Linux wheel for docs - # Only run docs on main branch pushes, tags, or PRs - if: github.event_name == 'push' || github.event_name == 'pull_request' - steps: - - name: Set target branch - if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag') - id: target-branch - run: | - set -x - if test '${{ github.ref }}' = 'refs/heads/main'; then - echo "value=asf-staging" >> "$GITHUB_OUTPUT" - elif test '${{ github.ref_type }}' = 'tag'; then - echo "value=asf-site" >> "$GITHUB_OUTPUT" - else - echo "Unsupported input: ${{ github.ref }} / ${{ github.ref_type }}" - exit 1 - fi - - - name: Checkout docs sources - uses: actions/checkout@v6 - - - name: Checkout docs target branch - if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag') - uses: actions/checkout@v6 - with: - fetch-depth: 0 - ref: ${{ steps.target-branch.outputs.value }} - path: docs-target - - - name: Setup Python - uses: actions/setup-python@v6 - with: - python-version: "3.10" - - - name: Install dependencies - uses: astral-sh/setup-uv@v7 - with: - enable-cache: true - - # Download the Linux wheel built in the previous job - - name: Download pre-built Linux wheel - uses: actions/download-artifact@v7 - with: - name: dist-manylinux-x86_64 - path: wheels/ - - # Install from the pre-built wheels - - name: Install from pre-built wheels - run: | - set -x - uv venv - # Install documentation dependencies - uv sync --dev --no-install-package datafusion --group docs - # Install all pre-built wheels - WHEELS=$(find wheels/ -name "*.whl") - if [ -n "$WHEELS" ]; then - echo "Installing wheels:" - echo "$WHEELS" - uv pip install wheels/*.whl - else - echo "ERROR: No wheels found!" - exit 1 - fi - - - name: Build docs - run: | - set -x - cd docs - curl -O https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv - curl -O https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet - uv run --no-project make html - - - name: Copy & push the generated HTML - if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag') - run: | - set -x - cd docs-target - # delete anything but: 1) '.'; 2) '..'; 3) .git/ - find ./ | grep -vE "^./$|^../$|^./.git" | xargs rm -rf - cp ../.asf.yaml . - cp -r ../docs/build/html/* . - git status --porcelain - if [ "$(git status --porcelain)" != "" ]; then - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - git add --all - git commit -m 'Publish built docs triggered by ${{ github.sha }}' - git push || git push --force - fi diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index ab284b522..000000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,41 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# CI workflow for pull requests - runs tests in DEBUG mode for faster feedback - -name: CI - -on: - pull_request: - branches: ["main"] - -concurrency: - group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} - cancel-in-progress: true - -jobs: - build: - uses: ./.github/workflows/build.yml - with: - build_mode: debug - run_wheels: false - secrets: inherit - - test: - needs: build - uses: ./.github/workflows/test.yml - secrets: inherit diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml deleted file mode 100644 index 2c8ecbc5e..000000000 --- a/.github/workflows/dev.yml +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Dev -on: [push, pull_request] - -jobs: - - rat: - name: Release Audit Tool (RAT) - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v6 - - name: Setup Python - uses: actions/setup-python@v6 - with: - python-version: "3.14" - - name: Audit licenses - run: ./dev/release/run-rat.sh . diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index bddc89eac..000000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,49 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Release workflow - runs tests in RELEASE mode and builds distribution wheels -# Triggered on: -# - Merges to main -# - Release candidate tags (*-rc*) -# - Release tags (e.g., 45.0.0) - -name: Release Build - -on: - push: - branches: - - "main" - tags: - - "*-rc*" # Release candidates (e.g., 45.0.0-rc1) - - "[0-9]+.*" # Release tags (e.g., 45.0.0) - -concurrency: - group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} - cancel-in-progress: true - -jobs: - build: - uses: ./.github/workflows/build.yml - with: - build_mode: release - run_wheels: true - secrets: inherit - - test: - needs: build - uses: ./.github/workflows/test.yml - secrets: inherit diff --git a/.github/workflows/take.yml b/.github/workflows/take.yml deleted file mode 100644 index 86dc190ad..000000000 --- a/.github/workflows/take.yml +++ /dev/null @@ -1,41 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Assign the issue via a `take` comment -on: - issue_comment: - types: created - -permissions: - issues: write - -jobs: - issue_assign: - runs-on: ubuntu-latest - if: (!github.event.issue.pull_request) && github.event.comment.body == 'take' - concurrency: - group: ${{ github.actor }}-issue-assign - steps: - - run: | - CODE=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -LI https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees/${{ github.event.comment.user.login }} -o /dev/null -w '%{http_code}\n' -s) - if [ "$CODE" -eq "204" ] - then - echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}" - curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees - else - echo "Cannot assign issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}" - fi \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index 692563019..000000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,133 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Reusable workflow for running tests -# This ensures the same tests run for both debug (PRs) and release (main/tags) builds - -name: Test - -on: - workflow_call: - -jobs: - test-matrix: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: - - "3.10" - - "3.11" - - "3.12" - - "3.13" - - "3.14" - toolchain: - - "stable" - - steps: - - uses: actions/checkout@v6 - - - name: Verify example datafusion version - run: | - MAIN_VERSION=$(grep -A 1 "name = \"datafusion-common\"" Cargo.lock | grep "version = " | head -1 | sed 's/.*version = "\(.*\)"/\1/') - EXAMPLE_VERSION=$(grep -A 1 "name = \"datafusion-common\"" examples/datafusion-ffi-example/Cargo.lock | grep "version = " | head -1 | sed 's/.*version = "\(.*\)"/\1/') - echo "Main crate datafusion version: $MAIN_VERSION" - echo "FFI example datafusion version: $EXAMPLE_VERSION" - - if [ "$MAIN_VERSION" != "$EXAMPLE_VERSION" ]; then - echo "❌ Error: FFI example datafusion versions don't match!" - exit 1 - fi - - - name: Setup Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python-version }} - - - name: Cache Cargo - uses: actions/cache@v5 - with: - path: ~/.cargo - key: cargo-cache-${{ matrix.toolchain }}-${{ hashFiles('Cargo.lock') }} - - - name: Install dependencies - uses: astral-sh/setup-uv@v7 - with: - enable-cache: true - - # Download the Linux wheel built in the build workflow - - name: Download pre-built Linux wheel - uses: actions/download-artifact@v7 - with: - name: dist-manylinux-x86_64 - path: wheels/ - - # Download the FFI test wheel - - name: Download pre-built FFI test wheel - uses: actions/download-artifact@v7 - with: - name: test-ffi-manylinux-x86_64 - path: wheels/ - - # Install from the pre-built wheels - - name: Install from pre-built wheels - run: | - set -x - uv venv - # Install development dependencies - uv sync --dev --no-install-package datafusion - # Install all pre-built wheels - WHEELS=$(find wheels/ -name "*.whl") - if [ -n "$WHEELS" ]; then - echo "Installing wheels:" - echo "$WHEELS" - uv pip install wheels/*.whl - else - echo "ERROR: No wheels found!" - exit 1 - fi - - - name: Run tests - env: - RUST_BACKTRACE: 1 - run: | - git submodule update --init - uv run --no-project pytest -v --import-mode=importlib - - - name: FFI unit tests - run: | - cd examples/datafusion-ffi-example - uv run --no-project pytest python/tests/_test*.py - - - name: Cache the generated dataset - id: cache-tpch-dataset - uses: actions/cache@v5 - with: - path: benchmarks/tpch/data - key: tpch-data-2.18.0 - - - name: Run dbgen to create 1 Gb dataset - if: ${{ steps.cache-tpch-dataset.outputs.cache-hit != 'true' }} - run: | - cd benchmarks/tpch - RUN_IN_CI=TRUE ./tpch-gen.sh 1 - - - name: Run TPC-H examples - run: | - cd examples/tpch - uv run --no-project python convert_data_to_parquet.py - uv run --no-project pytest _tests.py diff --git a/.github/workflows/verify-release-candidate.yml b/.github/workflows/verify-release-candidate.yml deleted file mode 100644 index a10a4faa9..000000000 --- a/.github/workflows/verify-release-candidate.yml +++ /dev/null @@ -1,78 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Verify Release Candidate - -# NOTE: This workflow is intended to be run manually via workflow_dispatch. - -on: - workflow_dispatch: - inputs: - version: - description: Version number (e.g., 52.0.0) - required: true - type: string - rc_number: - description: Release candidate number (e.g., 0) - required: true - type: string - -concurrency: - group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }} - cancel-in-progress: true - -jobs: - verify: - name: Verify RC (${{ matrix.os }}-${{ matrix.arch }}) - strategy: - fail-fast: false - matrix: - include: - # Linux - - os: linux - arch: x64 - runner: ubuntu-latest - - os: linux - arch: arm64 - runner: ubuntu-24.04-arm - - # macOS - - os: macos - arch: arm64 - runner: macos-latest - - os: macos - arch: x64 - runner: macos-15-intel - - # Windows - - os: windows - arch: x64 - runner: windows-latest - runs-on: ${{ matrix.runner }} - steps: - - name: Checkout repository - uses: actions/checkout@v6 - - - name: Set up protoc - uses: arduino/setup-protoc@v3 - with: - version: "27.4" - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Run release candidate verification - shell: bash - run: ./dev/release/verify-release-candidate.sh "${{ inputs.version }}" "${{ inputs.rc_number }}" diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 614d82327..000000000 --- a/.gitignore +++ /dev/null @@ -1,38 +0,0 @@ -target -/venv -.idea -/docs/temp -/docs/build -.DS_Store -.vscode - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# Python dist ignore -dist - -# C extensions -*.so - -# Python dist -dist - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -.python-version -venv -.venv - -apache-rat-*.jar -*rat.txt -.env -CHANGELOG.md.bak - -docs/mdbook/book - -.pyo3_build_config - diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index a3b1b5157..000000000 --- a/.gitmodules +++ /dev/null @@ -1,6 +0,0 @@ -[submodule "testing"] - path = testing - url = https://github.com/apache/arrow-testing.git -[submodule "parquet"] - path = parquet - url = https://github.com/apache/parquet-testing.git diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 8ae6a4e32..000000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,57 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -repos: - - repo: https://github.com/rhysd/actionlint - rev: v1.7.6 - hooks: - - id: actionlint-docker - - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.15.1 - hooks: - # Run the linter. - - id: ruff - # Run the formatter. - - id: ruff-format - - repo: local - hooks: - - id: rust-fmt - name: Rust fmt - description: Run cargo fmt on files included in the commit. rustfmt should be installed before-hand. - entry: cargo +nightly fmt --all -- - pass_filenames: true - types: [file, rust] - language: system - - id: rust-clippy - name: Rust clippy - description: Run cargo clippy on files included in the commit. clippy should be installed before-hand. - entry: cargo clippy --all-targets --all-features -- -Dclippy::all -D warnings -Aclippy::redundant_closure - pass_filenames: false - types: [file, rust] - language: system - - - repo: https://github.com/codespell-project/codespell - rev: v2.4.1 - hooks: - - id: codespell - args: [ --toml, "pyproject.toml"] - additional_dependencies: - - tomli - -default_language_version: - python: python3 diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index ae40911d8..000000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,22 +0,0 @@ - - -# DataFusion Python Changelog - -The changelogs have now moved [here](./dev/changelog). diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index 40b1ba7f1..000000000 --- a/Cargo.lock +++ /dev/null @@ -1,4770 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "abi_stable" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d6512d3eb05ffe5004c59c206de7f99c34951504056ce23fc953842f12c445" -dependencies = [ - "abi_stable_derive", - "abi_stable_shared", - "const_panic", - "core_extensions", - "crossbeam-channel", - "generational-arena", - "libloading", - "lock_api", - "parking_lot", - "paste", - "repr_offset", - "rustc_version", - "serde", - "serde_derive", - "serde_json", -] - -[[package]] -name = "abi_stable_derive" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7178468b407a4ee10e881bc7a328a65e739f0863615cca4429d43916b05e898" -dependencies = [ - "abi_stable_shared", - "as_derive_utils", - "core_extensions", - "proc-macro2", - "quote", - "rustc_version", - "syn 1.0.109", - "typed-arena", -] - -[[package]] -name = "abi_stable_shared" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2b5df7688c123e63f4d4d649cba63f2967ba7f7861b1664fca3f77d3dad2b63" -dependencies = [ - "core_extensions", -] - -[[package]] -name = "adler2" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" - -[[package]] -name = "ahash" -version = "0.8.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" -dependencies = [ - "cfg-if", - "const-random", - "getrandom 0.3.4", - "once_cell", - "version_check", - "zerocopy", -] - -[[package]] -name = "aho-corasick" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" -dependencies = [ - "memchr", -] - -[[package]] -name = "alloc-no-stdlib" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" - -[[package]] -name = "alloc-stdlib" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" -dependencies = [ - "alloc-no-stdlib", -] - -[[package]] -name = "allocator-api2" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "anyhow" -version = "1.0.101" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" - -[[package]] -name = "apache-avro" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" -dependencies = [ - "bigdecimal", - "bon", - "bzip2", - "crc32fast", - "digest", - "liblzma", - "log", - "miniz_oxide", - "num-bigint", - "quad-rand", - "rand", - "regex-lite", - "serde", - "serde_bytes", - "serde_json", - "snap", - "strum", - "strum_macros", - "thiserror", - "uuid", - "zstd", -] - -[[package]] -name = "ar_archive_writer" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" -dependencies = [ - "object", -] - -[[package]] -name = "arc-swap" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" -dependencies = [ - "rustversion", -] - -[[package]] -name = "arrayref" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" - -[[package]] -name = "arrayvec" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" - -[[package]] -name = "arrow" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602268ce9f569f282cedb9a9f6bac569b680af47b9b077d515900c03c5d190da" -dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-pyarrow", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", -] - -[[package]] -name = "arrow-arith" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd53c6bf277dea91f136ae8e3a5d7041b44b5e489e244e637d00ae302051f56f" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "num-traits", -] - -[[package]] -name = "arrow-array" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e53796e07a6525edaf7dc28b540d477a934aff14af97967ad1d5550878969b9e" -dependencies = [ - "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "chrono-tz", - "half", - "hashbrown 0.16.1", - "num-complex", - "num-integer", - "num-traits", -] - -[[package]] -name = "arrow-buffer" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2c1a85bb2e94ee10b76531d8bc3ce9b7b4c0d508cabfb17d477f63f2617bd20" -dependencies = [ - "bytes", - "half", - "num-bigint", - "num-traits", -] - -[[package]] -name = "arrow-cast" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89fb245db6b0e234ed8e15b644edb8664673fefe630575e94e62cd9d489a8a26" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-ord", - "arrow-schema", - "arrow-select", - "atoi", - "base64", - "chrono", - "comfy-table", - "half", - "lexical-core", - "num-traits", - "ryu", -] - -[[package]] -name = "arrow-csv" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d374882fb465a194462527c0c15a93aa19a554cf690a6b77a26b2a02539937a7" -dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-schema", - "chrono", - "csv", - "csv-core", - "regex", -] - -[[package]] -name = "arrow-data" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "189d210bc4244c715fa3ed9e6e22864673cccb73d5da28c2723fb2e527329b33" -dependencies = [ - "arrow-buffer", - "arrow-schema", - "half", - "num-integer", - "num-traits", -] - -[[package]] -name = "arrow-ipc" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7968c2e5210c41f4909b2ef76f6e05e172b99021c2def5edf3cc48fdd39d1d6c" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "flatbuffers", - "lz4_flex", - "zstd", -] - -[[package]] -name = "arrow-json" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92111dba5bf900f443488e01f00d8c4ddc2f47f5c50039d18120287b580baa22" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "half", - "indexmap", - "itoa", - "lexical-core", - "memchr", - "num-traits", - "ryu", - "serde_core", - "serde_json", - "simdutf8", -] - -[[package]] -name = "arrow-ord" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "211136cb253577ee1a6665f741a13136d4e563f64f5093ffd6fb837af90b9495" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", -] - -[[package]] -name = "arrow-pyarrow" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "205437da4c0877c756c81bfe847a621d0a740cd00a155109d65510a1a62ebcd9" -dependencies = [ - "arrow-array", - "arrow-data", - "arrow-schema", - "pyo3", -] - -[[package]] -name = "arrow-row" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e0f20145f9f5ea3fe383e2ba7a7487bf19be36aa9dbf5dd6a1f92f657179663" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "half", -] - -[[package]] -name = "arrow-schema" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b47e0ca91cc438d2c7879fe95e0bca5329fff28649e30a88c6f760b1faeddcb" -dependencies = [ - "bitflags", - "serde_core", - "serde_json", -] - -[[package]] -name = "arrow-select" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "750a7d1dda177735f5e82a314485b6915c7cccdbb278262ac44090f4aba4a325" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "num-traits", -] - -[[package]] -name = "arrow-string" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1eab1208bc4fe55d768cdc9b9f3d9df5a794cdb3ee2586bf89f9b30dc31ad8c" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "memchr", - "num-traits", - "regex", - "regex-syntax", -] - -[[package]] -name = "as_derive_utils" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff3c96645900a44cf11941c111bd08a6573b0e2f9f69bc9264b179d8fae753c4" -dependencies = [ - "core_extensions", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "async-compression" -version = "0.4.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d67d43201f4d20c78bcda740c142ca52482d81da80681533d33bf3f0596c8e2" -dependencies = [ - "compression-codecs", - "compression-core", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "async-ffi" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4de21c0feef7e5a556e51af767c953f0501f7f300ba785cc99c47bdc8081a50" -dependencies = [ - "abi_stable", -] - -[[package]] -name = "async-recursion" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "async-trait" -version = "0.1.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "atoi" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" -dependencies = [ - "num-traits", -] - -[[package]] -name = "atomic-waker" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" - -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - -[[package]] -name = "base64" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - -[[package]] -name = "bigdecimal" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" -dependencies = [ - "autocfg", - "libm", - "num-bigint", - "num-integer", - "num-traits", - "serde", -] - -[[package]] -name = "bitflags" -version = "2.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" - -[[package]] -name = "blake2" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" -dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", - "cpufeatures", -] - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - -[[package]] -name = "bon" -version = "3.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d13a61f2963b88eef9c1be03df65d42f6996dfeac1054870d950fcf66686f83" -dependencies = [ - "bon-macros", - "rustversion", -] - -[[package]] -name = "bon-macros" -version = "3.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0" -dependencies = [ - "darling", - "ident_case", - "prettyplease", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.117", -] - -[[package]] -name = "brotli" -version = "8.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] - -[[package]] -name = "brotli-decompressor" -version = "5.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", -] - -[[package]] -name = "bumpalo" -version = "3.20.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6f81257d10a0f602a294ae4182251151ff97dbb504ef9afcdda4a64b24d9b4" - -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - -[[package]] -name = "bytes" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" - -[[package]] -name = "bzip2" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" -dependencies = [ - "libbz2-rs-sys", -] - -[[package]] -name = "cc" -version = "1.2.56" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" -dependencies = [ - "find-msvc-tools", - "jobserver", - "libc", - "shlex", -] - -[[package]] -name = "cfg-if" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" - -[[package]] -name = "cfg_aliases" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" - -[[package]] -name = "chrono" -version = "0.4.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" -dependencies = [ - "iana-time-zone", - "num-traits", - "serde", - "windows-link", -] - -[[package]] -name = "chrono-tz" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" -dependencies = [ - "chrono", - "phf", -] - -[[package]] -name = "cmake" -version = "0.1.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" -dependencies = [ - "cc", -] - -[[package]] -name = "comfy-table" -version = "7.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" -dependencies = [ - "unicode-segmentation", - "unicode-width", -] - -[[package]] -name = "compression-codecs" -version = "0.4.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" -dependencies = [ - "bzip2", - "compression-core", - "flate2", - "liblzma", - "memchr", - "zstd", - "zstd-safe", -] - -[[package]] -name = "compression-core" -version = "0.4.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" - -[[package]] -name = "const-random" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" -dependencies = [ - "const-random-macro", -] - -[[package]] -name = "const-random-macro" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" -dependencies = [ - "getrandom 0.2.17", - "once_cell", - "tiny-keccak", -] - -[[package]] -name = "const_panic" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e262cdaac42494e3ae34c43969f9cdeb7da178bdb4b66fa6a1ea2edb4c8ae652" -dependencies = [ - "typewit", -] - -[[package]] -name = "constant_time_eq" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" - -[[package]] -name = "core-foundation" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - -[[package]] -name = "core_extensions" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42bb5e5d0269fd4f739ea6cedaf29c16d81c27a7ce7582008e90eb50dcd57003" -dependencies = [ - "core_extensions_proc_macros", -] - -[[package]] -name = "core_extensions_proc_macros" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533d38ecd2709b7608fb8e18e4504deb99e9a72879e6aa66373a76d8dc4259ea" - -[[package]] -name = "cpufeatures" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] - -[[package]] -name = "crc32fast" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - -[[package]] -name = "crypto-common" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "cstr" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68523903c8ae5aacfa32a0d9ae60cadeb764e1da14ee0d26b1f3089f13a54636" -dependencies = [ - "proc-macro2", - "quote", -] - -[[package]] -name = "csv" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde_core", -] - -[[package]] -name = "csv-core" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" -dependencies = [ - "memchr", -] - -[[package]] -name = "darling" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" -dependencies = [ - "darling_core", - "darling_macro", -] - -[[package]] -name = "darling_core" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" -dependencies = [ - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn 2.0.117", -] - -[[package]] -name = "darling_macro" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" -dependencies = [ - "darling_core", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "dashmap" -version = "6.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" -dependencies = [ - "cfg-if", - "crossbeam-utils", - "hashbrown 0.14.5", - "lock_api", - "once_cell", - "parking_lot_core", -] - -[[package]] -name = "datafusion" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "arrow-schema", - "async-trait", - "bytes", - "bzip2", - "chrono", - "datafusion-catalog", - "datafusion-catalog-listing", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-datasource-arrow", - "datafusion-datasource-avro", - "datafusion-datasource-csv", - "datafusion-datasource-json", - "datafusion-datasource-parquet", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-nested", - "datafusion-functions-table", - "datafusion-functions-window", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-session", - "datafusion-sql", - "flate2", - "futures", - "itertools", - "liblzma", - "log", - "object_store", - "parking_lot", - "parquet", - "rand", - "regex", - "sqlparser", - "tempfile", - "tokio", - "url", - "uuid", - "zstd", -] - -[[package]] -name = "datafusion-catalog" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "itertools", - "log", - "object_store", - "parking_lot", - "tokio", -] - -[[package]] -name = "datafusion-catalog-listing" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "futures", - "itertools", - "log", - "object_store", -] - -[[package]] -name = "datafusion-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "ahash", - "apache-avro", - "arrow", - "arrow-ipc", - "chrono", - "half", - "hashbrown 0.16.1", - "indexmap", - "itertools", - "libc", - "log", - "object_store", - "parquet", - "paste", - "recursive", - "sqlparser", - "tokio", - "web-time", -] - -[[package]] -name = "datafusion-common-runtime" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "futures", - "log", - "tokio", -] - -[[package]] -name = "datafusion-datasource" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-compression", - "async-trait", - "bytes", - "bzip2", - "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "flate2", - "futures", - "glob", - "itertools", - "liblzma", - "log", - "object_store", - "rand", - "tokio", - "tokio-util", - "url", - "zstd", -] - -[[package]] -name = "datafusion-datasource-arrow" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "arrow-ipc", - "async-trait", - "bytes", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "itertools", - "object_store", - "tokio", -] - -[[package]] -name = "datafusion-datasource-avro" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "apache-avro", - "arrow", - "async-trait", - "bytes", - "datafusion-common", - "datafusion-datasource", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "num-traits", - "object_store", -] - -[[package]] -name = "datafusion-datasource-csv" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "object_store", - "regex", - "tokio", -] - -[[package]] -name = "datafusion-datasource-json" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "object_store", - "serde_json", - "tokio", - "tokio-stream", -] - -[[package]] -name = "datafusion-datasource-parquet" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-pruning", - "datafusion-session", - "futures", - "itertools", - "log", - "object_store", - "parking_lot", - "parquet", - "tokio", -] - -[[package]] -name = "datafusion-doc" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" - -[[package]] -name = "datafusion-execution" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "arrow-buffer", - "async-trait", - "chrono", - "dashmap", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr-common", - "futures", - "log", - "object_store", - "parking_lot", - "rand", - "tempfile", - "url", -] - -[[package]] -name = "datafusion-expr" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr-common", - "indexmap", - "itertools", - "paste", - "recursive", - "serde_json", - "sqlparser", -] - -[[package]] -name = "datafusion-expr-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "datafusion-common", - "indexmap", - "itertools", - "paste", -] - -[[package]] -name = "datafusion-ffi" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "abi_stable", - "arrow", - "arrow-schema", - "async-ffi", - "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-proto", - "datafusion-proto-common", - "datafusion-session", - "futures", - "log", - "prost", - "semver", - "tokio", -] - -[[package]] -name = "datafusion-functions" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "arrow-buffer", - "base64", - "blake2", - "blake3", - "chrono", - "chrono-tz", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-macros", - "hex", - "itertools", - "log", - "md-5", - "memchr", - "num-traits", - "rand", - "regex", - "sha2", - "unicode-segmentation", - "uuid", -] - -[[package]] -name = "datafusion-functions-aggregate" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "half", - "log", - "num-traits", - "paste", -] - -[[package]] -name = "datafusion-functions-aggregate-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", - "datafusion-physical-expr-common", -] - -[[package]] -name = "datafusion-functions-nested" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "arrow-ord", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr-common", - "hashbrown 0.16.1", - "itertools", - "itoa", - "log", - "paste", -] - -[[package]] -name = "datafusion-functions-table" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-plan", - "parking_lot", - "paste", -] - -[[package]] -name = "datafusion-functions-window" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "log", - "paste", -] - -[[package]] -name = "datafusion-functions-window-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "datafusion-common", - "datafusion-physical-expr-common", -] - -[[package]] -name = "datafusion-macros" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "datafusion-doc", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "datafusion-optimizer" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "indexmap", - "itertools", - "log", - "recursive", - "regex", - "regex-syntax", -] - -[[package]] -name = "datafusion-physical-expr" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", - "half", - "hashbrown 0.16.1", - "indexmap", - "itertools", - "parking_lot", - "paste", - "petgraph", - "recursive", - "tokio", -] - -[[package]] -name = "datafusion-physical-expr-adapter" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-functions", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "itertools", -] - -[[package]] -name = "datafusion-physical-expr-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "ahash", - "arrow", - "chrono", - "datafusion-common", - "datafusion-expr-common", - "hashbrown 0.16.1", - "indexmap", - "itertools", - "parking_lot", -] - -[[package]] -name = "datafusion-physical-optimizer" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-pruning", - "itertools", - "recursive", -] - -[[package]] -name = "datafusion-physical-plan" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "ahash", - "arrow", - "arrow-ord", - "arrow-schema", - "async-trait", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "futures", - "half", - "hashbrown 0.16.1", - "indexmap", - "itertools", - "log", - "num-traits", - "parking_lot", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "datafusion-proto" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "chrono", - "datafusion-catalog", - "datafusion-catalog-listing", - "datafusion-common", - "datafusion-datasource", - "datafusion-datasource-arrow", - "datafusion-datasource-csv", - "datafusion-datasource-json", - "datafusion-datasource-parquet", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-table", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-proto-common", - "object_store", - "prost", - "rand", -] - -[[package]] -name = "datafusion-proto-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "datafusion-common", - "prost", -] - -[[package]] -name = "datafusion-pruning" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "datafusion-common", - "datafusion-datasource", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "itertools", - "log", -] - -[[package]] -name = "datafusion-python" -version = "52.0.0" -dependencies = [ - "arrow", - "arrow-select", - "async-trait", - "cstr", - "datafusion", - "datafusion-ffi", - "datafusion-proto", - "datafusion-substrait", - "futures", - "log", - "mimalloc", - "object_store", - "parking_lot", - "prost", - "prost-types", - "pyo3", - "pyo3-async-runtimes", - "pyo3-build-config", - "pyo3-log", - "serde_json", - "tokio", - "url", - "uuid", -] - -[[package]] -name = "datafusion-session" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "async-trait", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-plan", - "parking_lot", -] - -[[package]] -name = "datafusion-sql" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "bigdecimal", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-functions-nested", - "indexmap", - "log", - "recursive", - "regex", - "sqlparser", -] - -[[package]] -name = "datafusion-substrait" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "async-recursion", - "async-trait", - "chrono", - "datafusion", - "half", - "itertools", - "object_store", - "pbjson-types", - "prost", - "substrait", - "tokio", - "url", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "dyn-clone" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - -[[package]] -name = "errno" -version = "0.3.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" -dependencies = [ - "libc", - "windows-sys 0.61.2", -] - -[[package]] -name = "fastrand" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" - -[[package]] -name = "find-msvc-tools" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" - -[[package]] -name = "fixedbitset" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" - -[[package]] -name = "flatbuffers" -version = "25.12.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" -dependencies = [ - "bitflags", - "rustc_version", -] - -[[package]] -name = "flate2" -version = "1.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" -dependencies = [ - "crc32fast", - "miniz_oxide", - "zlib-rs", -] - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "foldhash" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" - -[[package]] -name = "foldhash" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" - -[[package]] -name = "form_urlencoded" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "futures" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" - -[[package]] -name = "futures-executor" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" - -[[package]] -name = "futures-macro" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "futures-sink" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" - -[[package]] -name = "futures-task" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" - -[[package]] -name = "futures-util" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "slab", -] - -[[package]] -name = "generational-arena" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877e94aff08e743b651baaea359664321055749b398adff8740a7399af7796e7" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getrandom" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" -dependencies = [ - "cfg-if", - "js-sys", - "libc", - "wasi", - "wasm-bindgen", -] - -[[package]] -name = "getrandom" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" -dependencies = [ - "cfg-if", - "js-sys", - "libc", - "r-efi", - "wasip2", - "wasm-bindgen", -] - -[[package]] -name = "getrandom" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasip2", - "wasip3", -] - -[[package]] -name = "glob" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" - -[[package]] -name = "h2" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" -dependencies = [ - "atomic-waker", - "bytes", - "fnv", - "futures-core", - "futures-sink", - "http", - "indexmap", - "slab", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "half" -version = "2.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" -dependencies = [ - "cfg-if", - "crunchy", - "num-traits", - "zerocopy", -] - -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" - -[[package]] -name = "hashbrown" -version = "0.15.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" -dependencies = [ - "foldhash 0.1.5", -] - -[[package]] -name = "hashbrown" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" -dependencies = [ - "allocator-api2", - "equivalent", - "foldhash 0.2.0", -] - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "http" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" -dependencies = [ - "bytes", - "itoa", -] - -[[package]] -name = "http-body" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" -dependencies = [ - "bytes", - "http", -] - -[[package]] -name = "http-body-util" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" -dependencies = [ - "bytes", - "futures-core", - "http", - "http-body", - "pin-project-lite", -] - -[[package]] -name = "httparse" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" - -[[package]] -name = "humantime" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" - -[[package]] -name = "hyper" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" -dependencies = [ - "atomic-waker", - "bytes", - "futures-channel", - "futures-core", - "h2", - "http", - "http-body", - "httparse", - "itoa", - "pin-project-lite", - "pin-utils", - "smallvec", - "tokio", - "want", -] - -[[package]] -name = "hyper-rustls" -version = "0.27.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" -dependencies = [ - "http", - "hyper", - "hyper-util", - "rustls", - "rustls-native-certs", - "rustls-pki-types", - "tokio", - "tokio-rustls", - "tower-service", -] - -[[package]] -name = "hyper-util" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" -dependencies = [ - "base64", - "bytes", - "futures-channel", - "futures-util", - "http", - "http-body", - "hyper", - "ipnet", - "libc", - "percent-encoding", - "pin-project-lite", - "socket2", - "tokio", - "tower-service", - "tracing", -] - -[[package]] -name = "iana-time-zone" -version = "0.1.65" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "icu_collections" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" -dependencies = [ - "displaydoc", - "potential_utf", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locale_core" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_normalizer" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" -dependencies = [ - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" - -[[package]] -name = "icu_properties" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" -dependencies = [ - "icu_collections", - "icu_locale_core", - "icu_properties_data", - "icu_provider", - "zerotrie", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" - -[[package]] -name = "icu_provider" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" -dependencies = [ - "displaydoc", - "icu_locale_core", - "writeable", - "yoke", - "zerofrom", - "zerotrie", - "zerovec", -] - -[[package]] -name = "id-arena" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" - -[[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - -[[package]] -name = "idna" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" -dependencies = [ - "icu_normalizer", - "icu_properties", -] - -[[package]] -name = "indexmap" -version = "2.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" -dependencies = [ - "equivalent", - "hashbrown 0.16.1", - "serde", - "serde_core", -] - -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - -[[package]] -name = "ipnet" -version = "2.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" - -[[package]] -name = "iri-string" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" -dependencies = [ - "memchr", - "serde", -] - -[[package]] -name = "itertools" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" - -[[package]] -name = "jobserver" -version = "0.1.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" -dependencies = [ - "getrandom 0.3.4", - "libc", -] - -[[package]] -name = "js-sys" -version = "0.3.85" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" -dependencies = [ - "once_cell", - "wasm-bindgen", -] - -[[package]] -name = "leb128fmt" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" - -[[package]] -name = "lexical-core" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" -dependencies = [ - "lexical-parse-integer", - "lexical-util", -] - -[[package]] -name = "lexical-parse-integer" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" -dependencies = [ - "lexical-util", -] - -[[package]] -name = "lexical-util" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" - -[[package]] -name = "lexical-write-float" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" -dependencies = [ - "lexical-util", - "lexical-write-integer", -] - -[[package]] -name = "lexical-write-integer" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" -dependencies = [ - "lexical-util", -] - -[[package]] -name = "libbz2-rs-sys" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" - -[[package]] -name = "libc" -version = "0.2.182" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" - -[[package]] -name = "libloading" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" -dependencies = [ - "cfg-if", - "winapi", -] - -[[package]] -name = "liblzma" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6033b77c21d1f56deeae8014eb9fbe7bdf1765185a6c508b5ca82eeaed7f899" -dependencies = [ - "liblzma-sys", -] - -[[package]] -name = "liblzma-sys" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f2db66f3268487b5033077f266da6777d057949b8f93c8ad82e441df25e6186" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "libm" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" - -[[package]] -name = "libmimalloc-sys" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "667f4fec20f29dfc6bc7357c582d91796c169ad7e2fce709468aefeb2c099870" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "linux-raw-sys" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" - -[[package]] -name = "litemap" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" - -[[package]] -name = "lock_api" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" -dependencies = [ - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" - -[[package]] -name = "lru-slab" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" - -[[package]] -name = "lz4_flex" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" -dependencies = [ - "twox-hash", -] - -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest", -] - -[[package]] -name = "memchr" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" - -[[package]] -name = "mimalloc" -version = "0.1.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1ee66a4b64c74f4ef288bcbb9192ad9c3feaad75193129ac8509af543894fd8" -dependencies = [ - "libmimalloc-sys", -] - -[[package]] -name = "miniz_oxide" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" -dependencies = [ - "adler2", - "simd-adler32", -] - -[[package]] -name = "mio" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" -dependencies = [ - "libc", - "wasi", - "windows-sys 0.61.2", -] - -[[package]] -name = "multimap" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" - -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", - "serde", -] - -[[package]] -name = "num-complex" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "object" -version = "0.37.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" -dependencies = [ - "memchr", -] - -[[package]] -name = "object_store" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2858065e55c148d294a9f3aae3b0fa9458edadb41a108397094566f4e3c0dfb" -dependencies = [ - "async-trait", - "base64", - "bytes", - "chrono", - "form_urlencoded", - "futures", - "http", - "http-body-util", - "httparse", - "humantime", - "hyper", - "itertools", - "md-5", - "parking_lot", - "percent-encoding", - "quick-xml", - "rand", - "reqwest", - "ring", - "rustls-pki-types", - "serde", - "serde_json", - "serde_urlencoded", - "thiserror", - "tokio", - "tracing", - "url", - "walkdir", - "wasm-bindgen-futures", - "web-time", -] - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "openssl-probe" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" - -[[package]] -name = "ordered-float" -version = "2.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" -dependencies = [ - "num-traits", -] - -[[package]] -name = "parking_lot" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-link", -] - -[[package]] -name = "parquet" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f491d0ef1b510194426ee67ddc18a9b747ef3c42050c19322a2cd2e1666c29b" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", - "base64", - "brotli", - "bytes", - "chrono", - "flate2", - "futures", - "half", - "hashbrown 0.16.1", - "lz4_flex", - "num-bigint", - "num-integer", - "num-traits", - "object_store", - "paste", - "seq-macro", - "simdutf8", - "snap", - "thrift", - "tokio", - "twox-hash", - "zstd", -] - -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - -[[package]] -name = "pbjson" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "898bac3fa00d0ba57a4e8289837e965baa2dee8c3749f3b11d45a64b4223d9c3" -dependencies = [ - "base64", - "serde", -] - -[[package]] -name = "pbjson-build" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af22d08a625a2213a78dbb0ffa253318c5c79ce3133d32d296655a7bdfb02095" -dependencies = [ - "heck", - "itertools", - "prost", - "prost-types", -] - -[[package]] -name = "pbjson-types" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e748e28374f10a330ee3bb9f29b828c0ac79831a32bab65015ad9b661ead526" -dependencies = [ - "bytes", - "chrono", - "pbjson", - "pbjson-build", - "prost", - "prost-build", - "serde", -] - -[[package]] -name = "percent-encoding" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" - -[[package]] -name = "petgraph" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" -dependencies = [ - "fixedbitset", - "hashbrown 0.15.5", - "indexmap", - "serde", -] - -[[package]] -name = "phf" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_shared" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" -dependencies = [ - "siphasher", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "pkg-config" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" - -[[package]] -name = "portable-atomic" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" - -[[package]] -name = "potential_utf" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" -dependencies = [ - "zerovec", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "prettyplease" -version = "0.2.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" -dependencies = [ - "proc-macro2", - "syn 2.0.117", -] - -[[package]] -name = "proc-macro2" -version = "1.0.106" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "prost" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" -dependencies = [ - "bytes", - "prost-derive", -] - -[[package]] -name = "prost-build" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" -dependencies = [ - "heck", - "itertools", - "log", - "multimap", - "petgraph", - "prettyplease", - "prost", - "prost-types", - "regex", - "syn 2.0.117", - "tempfile", -] - -[[package]] -name = "prost-derive" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" -dependencies = [ - "anyhow", - "itertools", - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "prost-types" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" -dependencies = [ - "prost", -] - -[[package]] -name = "protobuf-src" -version = "2.1.1+27.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6217c3504da19b85a3a4b2e9a5183d635822d83507ba0986624b5c05b83bfc40" -dependencies = [ - "cmake", -] - -[[package]] -name = "psm" -version = "0.1.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8" -dependencies = [ - "ar_archive_writer", - "cc", -] - -[[package]] -name = "pyo3" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1" -dependencies = [ - "libc", - "once_cell", - "portable-atomic", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", -] - -[[package]] -name = "pyo3-async-runtimes" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e7364a95bf00e8377bbf9b0f09d7ff9715a29d8fcf93b47d1a967363b973178" -dependencies = [ - "futures-channel", - "futures-util", - "once_cell", - "pin-project-lite", - "pyo3", - "tokio", -] - -[[package]] -name = "pyo3-build-config" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7" -dependencies = [ - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-log" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c2ec80932c5c3b2d4fbc578c9b56b2d4502098587edb8bef5b6bfcad43682e" -dependencies = [ - "arc-swap", - "log", - "pyo3", -] - -[[package]] -name = "pyo3-macros" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a" -dependencies = [ - "heck", - "proc-macro2", - "pyo3-build-config", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "quad-rand" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" - -[[package]] -name = "quick-xml" -version = "0.38.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" -dependencies = [ - "memchr", - "serde", -] - -[[package]] -name = "quinn" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" -dependencies = [ - "bytes", - "cfg_aliases", - "pin-project-lite", - "quinn-proto", - "quinn-udp", - "rustc-hash", - "rustls", - "socket2", - "thiserror", - "tokio", - "tracing", - "web-time", -] - -[[package]] -name = "quinn-proto" -version = "0.11.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" -dependencies = [ - "bytes", - "getrandom 0.3.4", - "lru-slab", - "rand", - "ring", - "rustc-hash", - "rustls", - "rustls-pki-types", - "slab", - "thiserror", - "tinyvec", - "tracing", - "web-time", -] - -[[package]] -name = "quinn-udp" -version = "0.5.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" -dependencies = [ - "cfg_aliases", - "libc", - "once_cell", - "socket2", - "tracing", - "windows-sys 0.60.2", -] - -[[package]] -name = "quote" -version = "1.0.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "r-efi" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" - -[[package]] -name = "rand" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" -dependencies = [ - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" -dependencies = [ - "getrandom 0.3.4", -] - -[[package]] -name = "recursive" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" -dependencies = [ - "recursive-proc-macro-impl", - "stacker", -] - -[[package]] -name = "recursive-proc-macro-impl" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" -dependencies = [ - "quote", - "syn 2.0.117", -] - -[[package]] -name = "redox_syscall" -version = "0.5.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" -dependencies = [ - "bitflags", -] - -[[package]] -name = "regex" -version = "1.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-lite" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" - -[[package]] -name = "regex-syntax" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" - -[[package]] -name = "regress" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2057b2325e68a893284d1538021ab90279adac1139957ca2a74426c6f118fb48" -dependencies = [ - "hashbrown 0.16.1", - "memchr", -] - -[[package]] -name = "repr_offset" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb1070755bd29dffc19d0971cab794e607839ba2ef4b69a9e6fbc8733c1b72ea" -dependencies = [ - "tstr", -] - -[[package]] -name = "reqwest" -version = "0.12.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" -dependencies = [ - "base64", - "bytes", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "http-body-util", - "hyper", - "hyper-rustls", - "hyper-util", - "js-sys", - "log", - "percent-encoding", - "pin-project-lite", - "quinn", - "rustls", - "rustls-native-certs", - "rustls-pki-types", - "serde", - "serde_json", - "serde_urlencoded", - "sync_wrapper", - "tokio", - "tokio-rustls", - "tokio-util", - "tower", - "tower-http", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "wasm-streams", - "web-sys", -] - -[[package]] -name = "ring" -version = "0.17.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" -dependencies = [ - "cc", - "cfg-if", - "getrandom 0.2.17", - "libc", - "untrusted", - "windows-sys 0.52.0", -] - -[[package]] -name = "rustc-hash" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" - -[[package]] -name = "rustc_version" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" -dependencies = [ - "semver", -] - -[[package]] -name = "rustix" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys", - "windows-sys 0.61.2", -] - -[[package]] -name = "rustls" -version = "0.23.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" -dependencies = [ - "once_cell", - "ring", - "rustls-pki-types", - "rustls-webpki", - "subtle", - "zeroize", -] - -[[package]] -name = "rustls-native-certs" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" -dependencies = [ - "openssl-probe", - "rustls-pki-types", - "schannel", - "security-framework", -] - -[[package]] -name = "rustls-pki-types" -version = "1.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" -dependencies = [ - "web-time", - "zeroize", -] - -[[package]] -name = "rustls-webpki" -version = "0.103.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" -dependencies = [ - "ring", - "rustls-pki-types", - "untrusted", -] - -[[package]] -name = "rustversion" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" - -[[package]] -name = "ryu" -version = "1.0.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "schannel" -version = "0.1.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" -dependencies = [ - "windows-sys 0.61.2", -] - -[[package]] -name = "schemars" -version = "0.8.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" -dependencies = [ - "dyn-clone", - "schemars_derive", - "serde", - "serde_json", -] - -[[package]] -name = "schemars_derive" -version = "0.8.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" -dependencies = [ - "proc-macro2", - "quote", - "serde_derive_internals", - "syn 2.0.117", -] - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "security-framework" -version = "3.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38" -dependencies = [ - "bitflags", - "core-foundation", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework-sys" -version = "2.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "321c8673b092a9a42605034a9879d73cb79101ed5fd117bc9a597b89b4e9e61a" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "semver" -version = "1.0.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" -dependencies = [ - "serde", - "serde_core", -] - -[[package]] -name = "seq-macro" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" - -[[package]] -name = "serde" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" -dependencies = [ - "serde_core", - "serde_derive", -] - -[[package]] -name = "serde_bytes" -version = "0.11.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" -dependencies = [ - "serde", - "serde_core", -] - -[[package]] -name = "serde_core" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "serde_derive_internals" -version = "0.29.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "serde_json" -version = "1.0.149" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" -dependencies = [ - "itoa", - "memchr", - "serde", - "serde_core", - "zmij", -] - -[[package]] -name = "serde_tokenstream" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64060d864397305347a78851c51588fd283767e7e7589829e8121d65512340f1" -dependencies = [ - "proc-macro2", - "quote", - "serde", - "syn 2.0.117", -] - -[[package]] -name = "serde_urlencoded" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" -dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "serde_yaml" -version = "0.9.34+deprecated" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" -dependencies = [ - "indexmap", - "itoa", - "ryu", - "serde", - "unsafe-libyaml", -] - -[[package]] -name = "sha2" -version = "0.10.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "simd-adler32" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" - -[[package]] -name = "simdutf8" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" - -[[package]] -name = "siphasher" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" - -[[package]] -name = "slab" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" - -[[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" - -[[package]] -name = "snap" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" - -[[package]] -name = "socket2" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" -dependencies = [ - "libc", - "windows-sys 0.60.2", -] - -[[package]] -name = "sqlparser" -version = "0.61.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" -dependencies = [ - "log", - "recursive", - "sqlparser_derive", -] - -[[package]] -name = "sqlparser_derive" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" - -[[package]] -name = "stacker" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "windows-sys 0.59.0", -] - -[[package]] -name = "strsim" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" - -[[package]] -name = "strum" -version = "0.27.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" - -[[package]] -name = "strum_macros" -version = "0.27.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "substrait" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62fc4b483a129b9772ccb9c3f7945a472112fdd9140da87f8a4e7f1d44e045d0" -dependencies = [ - "heck", - "pbjson", - "pbjson-build", - "pbjson-types", - "prettyplease", - "prost", - "prost-build", - "prost-types", - "protobuf-src", - "regress", - "schemars", - "semver", - "serde", - "serde_json", - "serde_yaml", - "syn 2.0.117", - "typify", - "walkdir", -] - -[[package]] -name = "subtle" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.117" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "sync_wrapper" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" -dependencies = [ - "futures-core", -] - -[[package]] -name = "synstructure" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "target-lexicon" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" - -[[package]] -name = "tempfile" -version = "3.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" -dependencies = [ - "fastrand", - "getrandom 0.4.1", - "once_cell", - "rustix", - "windows-sys 0.61.2", -] - -[[package]] -name = "thiserror" -version = "2.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "2.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float", -] - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "tinystr" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" -dependencies = [ - "displaydoc", - "zerovec", -] - -[[package]] -name = "tinyvec" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "tokio" -version = "1.49.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" -dependencies = [ - "bytes", - "libc", - "mio", - "pin-project-lite", - "socket2", - "tokio-macros", - "windows-sys 0.61.2", -] - -[[package]] -name = "tokio-macros" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "tokio-rustls" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" -dependencies = [ - "rustls", - "tokio", -] - -[[package]] -name = "tokio-stream" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", - "tokio-util", -] - -[[package]] -name = "tokio-util" -version = "0.7.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "tower" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" -dependencies = [ - "futures-core", - "futures-util", - "pin-project-lite", - "sync_wrapper", - "tokio", - "tower-layer", - "tower-service", -] - -[[package]] -name = "tower-http" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" -dependencies = [ - "bitflags", - "bytes", - "futures-util", - "http", - "http-body", - "iri-string", - "pin-project-lite", - "tower", - "tower-layer", - "tower-service", -] - -[[package]] -name = "tower-layer" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" - -[[package]] -name = "tower-service" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" - -[[package]] -name = "tracing" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" -dependencies = [ - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "tracing-core" -version = "0.1.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" -dependencies = [ - "once_cell", -] - -[[package]] -name = "try-lock" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" - -[[package]] -name = "tstr" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f8e0294f14baae476d0dd0a2d780b2e24d66e349a9de876f5126777a37bdba7" -dependencies = [ - "tstr_proc_macros", -] - -[[package]] -name = "tstr_proc_macros" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e78122066b0cb818b8afd08f7ed22f7fdbc3e90815035726f0840d0d26c0747a" - -[[package]] -name = "twox-hash" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" - -[[package]] -name = "typed-arena" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" - -[[package]] -name = "typenum" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" - -[[package]] -name = "typewit" -version = "1.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c1ae7cc0fdb8b842d65d127cb981574b0d2b249b74d1c7a2986863dc134f71" - -[[package]] -name = "typify" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5bcc6f62eb1fa8aa4098f39b29f93dcb914e17158b76c50360911257aa629" -dependencies = [ - "typify-impl", - "typify-macro", -] - -[[package]] -name = "typify-impl" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1eb359f7ffa4f9ebe947fa11a1b2da054564502968db5f317b7e37693cb2240" -dependencies = [ - "heck", - "log", - "proc-macro2", - "quote", - "regress", - "schemars", - "semver", - "serde", - "serde_json", - "syn 2.0.117", - "thiserror", - "unicode-ident", -] - -[[package]] -name = "typify-macro" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "911c32f3c8514b048c1b228361bebb5e6d73aeec01696e8cc0e82e2ffef8ab7a" -dependencies = [ - "proc-macro2", - "quote", - "schemars", - "semver", - "serde", - "serde_json", - "serde_tokenstream", - "syn 2.0.117", - "typify-impl", -] - -[[package]] -name = "unicode-ident" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" - -[[package]] -name = "unicode-segmentation" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" - -[[package]] -name = "unicode-width" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" - -[[package]] -name = "unicode-xid" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" - -[[package]] -name = "unsafe-libyaml" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" - -[[package]] -name = "untrusted" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" - -[[package]] -name = "url" -version = "2.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", - "serde", -] - -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - -[[package]] -name = "uuid" -version = "1.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" -dependencies = [ - "getrandom 0.4.1", - "js-sys", - "serde_core", - "wasm-bindgen", -] - -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "want" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] - -[[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" - -[[package]] -name = "wasip2" -version = "1.0.2+wasi-0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" -dependencies = [ - "wit-bindgen", -] - -[[package]] -name = "wasip3" -version = "0.4.0+wasi-0.3.0-rc-2026-01-06" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" -dependencies = [ - "wit-bindgen", -] - -[[package]] -name = "wasm-bindgen" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" -dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-futures" -version = "0.4.58" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" -dependencies = [ - "cfg-if", - "futures-util", - "js-sys", - "once_cell", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" -dependencies = [ - "bumpalo", - "proc-macro2", - "quote", - "syn 2.0.117", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "wasm-encoder" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" -dependencies = [ - "leb128fmt", - "wasmparser", -] - -[[package]] -name = "wasm-metadata" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" -dependencies = [ - "anyhow", - "indexmap", - "wasm-encoder", - "wasmparser", -] - -[[package]] -name = "wasm-streams" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" -dependencies = [ - "futures-util", - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - -[[package]] -name = "wasmparser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" -dependencies = [ - "bitflags", - "hashbrown 0.15.5", - "indexmap", - "semver", -] - -[[package]] -name = "web-sys" -version = "0.3.85" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "web-time" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" -dependencies = [ - "windows-sys 0.61.2", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-core" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link", - "windows-result", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "windows-interface" -version = "0.59.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "windows-link" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" - -[[package]] -name = "windows-result" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-strings" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-sys" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.5", -] - -[[package]] -name = "windows-sys" -version = "0.61.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-targets" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", -] - -[[package]] -name = "windows-targets" -version = "0.53.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" -dependencies = [ - "windows-link", - "windows_aarch64_gnullvm 0.53.1", - "windows_aarch64_msvc 0.53.1", - "windows_i686_gnu 0.53.1", - "windows_i686_gnullvm 0.53.1", - "windows_i686_msvc 0.53.1", - "windows_x86_64_gnu 0.53.1", - "windows_x86_64_gnullvm 0.53.1", - "windows_x86_64_msvc 0.53.1", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_i686_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" - -[[package]] -name = "wit-bindgen" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" -dependencies = [ - "wit-bindgen-rust-macro", -] - -[[package]] -name = "wit-bindgen-core" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" -dependencies = [ - "anyhow", - "heck", - "wit-parser", -] - -[[package]] -name = "wit-bindgen-rust" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" -dependencies = [ - "anyhow", - "heck", - "indexmap", - "prettyplease", - "syn 2.0.117", - "wasm-metadata", - "wit-bindgen-core", - "wit-component", -] - -[[package]] -name = "wit-bindgen-rust-macro" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" -dependencies = [ - "anyhow", - "prettyplease", - "proc-macro2", - "quote", - "syn 2.0.117", - "wit-bindgen-core", - "wit-bindgen-rust", -] - -[[package]] -name = "wit-component" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" -dependencies = [ - "anyhow", - "bitflags", - "indexmap", - "log", - "serde", - "serde_derive", - "serde_json", - "wasm-encoder", - "wasm-metadata", - "wasmparser", - "wit-parser", -] - -[[package]] -name = "wit-parser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" -dependencies = [ - "anyhow", - "id-arena", - "indexmap", - "log", - "semver", - "serde", - "serde_derive", - "serde_json", - "unicode-xid", - "wasmparser", -] - -[[package]] -name = "writeable" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" - -[[package]] -name = "yoke" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" -dependencies = [ - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", - "synstructure", -] - -[[package]] -name = "zerocopy" -version = "0.8.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "zerofrom" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", - "synstructure", -] - -[[package]] -name = "zeroize" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" - -[[package]] -name = "zerotrie" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", -] - -[[package]] -name = "zerovec" -version = "0.11.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "zlib-rs" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c745c48e1007337ed136dc99df34128b9faa6ed542d80a1c673cf55a6d7236c8" - -[[package]] -name = "zmij" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" - -[[package]] -name = "zstd" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "7.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" -dependencies = [ - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "2.0.16+zstd.1.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" -dependencies = [ - "cc", - "pkg-config", -] diff --git a/Cargo.toml b/Cargo.toml deleted file mode 100644 index b584470d6..000000000 --- a/Cargo.toml +++ /dev/null @@ -1,101 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "datafusion-python" -version = "52.0.0" -homepage = "https://datafusion.apache.org/python" -repository = "https://github.com/apache/datafusion-python" -authors = ["Apache DataFusion "] -description = "Apache DataFusion DataFrame and SQL Query Engine" -readme = "README.md" -license = "Apache-2.0" -edition = "2024" -rust-version = "1.88" -include = [ - "/src", - "/datafusion", - "/LICENSE.txt", - "build.rs", - "pyproject.toml", - "Cargo.toml", - "Cargo.lock", -] - -[features] -default = ["mimalloc"] -protoc = ["datafusion-substrait/protoc"] -substrait = ["dep:datafusion-substrait"] - -[dependencies] -tokio = { version = "1.49", features = [ - "macros", - "rt", - "rt-multi-thread", - "sync", -] } -pyo3 = { version = "0.28", features = [ - "extension-module", - "abi3", - "abi3-py310", -] } -pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] } -pyo3-log = "0.13.3" -arrow = { version = "58", features = ["pyarrow"] } -arrow-select = { version = "58" } -datafusion = { version = "53", features = ["avro", "unicode_expressions"] } -datafusion-substrait = { version = "53", optional = true } -datafusion-proto = { version = "53" } -datafusion-ffi = { version = "53" } -prost = "0.14.3" # keep in line with `datafusion-substrait` -serde_json = "1" -uuid = { version = "1.21", features = ["v4"] } -mimalloc = { version = "0.1", optional = true, default-features = false, features = [ - "local_dynamic_tls", -] } -async-trait = "0.1.89" -futures = "0.3" -cstr = "0.2" -object_store = { version = "0.13.1", features = [ - "aws", - "gcp", - "azure", - "http", -] } -url = "2" -log = "0.4.29" -parking_lot = "0.12" - -[build-dependencies] -prost-types = "0.14.3" # keep in line with `datafusion-substrait` -pyo3-build-config = "0.28" - -[lib] -name = "datafusion_python" -crate-type = ["cdylib", "rlib"] - -[profile.release] -lto = true -codegen-units = 1 - -# We cannot publish to crates.io with any patches in the below section. Developers -# must remove any entries in this section before creating a release candidate. -[patch.crates-io] -datafusion = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" } -datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" } -datafusion-proto = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" } -datafusion-ffi = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" } diff --git a/LICENSE.txt b/LICENSE.txt deleted file mode 100644 index d64569567..000000000 --- a/LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/README.md b/README.md deleted file mode 100644 index 810ac8710..000000000 --- a/README.md +++ /dev/null @@ -1,312 +0,0 @@ - - -# DataFusion in Python - -[![Python test](https://github.com/apache/datafusion-python/actions/workflows/test.yaml/badge.svg)](https://github.com/apache/datafusion-python/actions/workflows/test.yaml) -[![Python Release Build](https://github.com/apache/datafusion-python/actions/workflows/build.yml/badge.svg)](https://github.com/apache/datafusion-python/actions/workflows/build.yml) - -This is a Python library that binds to [Apache Arrow](https://arrow.apache.org/) in-memory query engine [DataFusion](https://github.com/apache/datafusion). - -DataFusion's Python bindings can be used as a foundation for building new data systems in Python. Here are some examples: - -- [Dask SQL](https://github.com/dask-contrib/dask-sql) uses DataFusion's Python bindings for SQL parsing, query - planning, and logical plan optimizations, and then transpiles the logical plan to Dask operations for execution. -- [DataFusion Ballista](https://github.com/apache/datafusion-ballista) is a distributed SQL query engine that extends - DataFusion's Python bindings for distributed use cases. -- [DataFusion Ray](https://github.com/apache/datafusion-ray) is another distributed query engine that uses - DataFusion's Python bindings. - -## Features - -- Execute queries using SQL or DataFrames against CSV, Parquet, and JSON data sources. -- Queries are optimized using DataFusion's query optimizer. -- Execute user-defined Python code from SQL. -- Exchange data with Pandas and other DataFrame libraries that support PyArrow. -- Serialize and deserialize query plans in Substrait format. -- Experimental support for transpiling SQL queries to DataFrame calls with Polars, Pandas, and cuDF. - -For tips on tuning parallelism, see -[Maximizing CPU Usage](docs/source/user-guide/configuration.rst#maximizing-cpu-usage) -in the configuration guide. - -## Example Usage - -The following example demonstrates running a SQL query against a Parquet file using DataFusion, storing the results -in a Pandas DataFrame, and then plotting a chart. - -The Parquet file used in this example can be downloaded from the following page: - -- https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page - -```python -from datafusion import SessionContext - -# Create a DataFusion context -ctx = SessionContext() - -# Register table with context -ctx.register_parquet('taxi', 'yellow_tripdata_2021-01.parquet') - -# Execute SQL -df = ctx.sql("select passenger_count, count(*) " - "from taxi " - "where passenger_count is not null " - "group by passenger_count " - "order by passenger_count") - -# convert to Pandas -pandas_df = df.to_pandas() - -# create a chart -fig = pandas_df.plot(kind="bar", title="Trip Count by Number of Passengers").get_figure() -fig.savefig('chart.png') -``` - -This produces the following chart: - -![Chart](examples/chart.png) - -## Registering a DataFrame as a View - -You can use SessionContext's `register_view` method to convert a DataFrame into a view and register it with the context. - -```python -from datafusion import SessionContext, col, literal - -# Create a DataFusion context -ctx = SessionContext() - -# Create sample data -data = {"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]} - -# Create a DataFrame from the dictionary -df = ctx.from_pydict(data, "my_table") - -# Filter the DataFrame (for example, keep rows where a > 2) -df_filtered = df.filter(col("a") > literal(2)) - -# Register the dataframe as a view with the context -ctx.register_view("view1", df_filtered) - -# Now run a SQL query against the registered view -df_view = ctx.sql("SELECT * FROM view1") - -# Collect the results -results = df_view.collect() - -# Convert results to a list of dictionaries for display -result_dicts = [batch.to_pydict() for batch in results] - -print(result_dicts) -``` - -This will output: - -```python -[{'a': [3, 4, 5], 'b': [30, 40, 50]}] -``` - -## Configuration - -It is possible to configure runtime (memory and disk settings) and configuration settings when creating a context. - -```python -runtime = ( - RuntimeEnvBuilder() - .with_disk_manager_os() - .with_fair_spill_pool(10000000) -) -config = ( - SessionConfig() - .with_create_default_catalog_and_schema(True) - .with_default_catalog_and_schema("foo", "bar") - .with_target_partitions(8) - .with_information_schema(True) - .with_repartition_joins(False) - .with_repartition_aggregations(False) - .with_repartition_windows(False) - .with_parquet_pruning(False) - .set("datafusion.execution.parquet.pushdown_filters", "true") -) -ctx = SessionContext(config, runtime) -``` - -Refer to the [API documentation](https://arrow.apache.org/datafusion-python/#api-reference) for more information. - -Printing the context will show the current configuration settings. - -```python -print(ctx) -``` - -## Extensions - -For information about how to extend DataFusion Python, please see the extensions page of the -[online documentation](https://datafusion.apache.org/python/). - -## More Examples - -See [examples](examples/README.md) for more information. - -### Executing Queries with DataFusion - -- [Query a Parquet file using SQL](https://github.com/apache/datafusion-python/blob/main/examples/sql-parquet.py) -- [Query a Parquet file using the DataFrame API](https://github.com/apache/datafusion-python/blob/main/examples/dataframe-parquet.py) -- [Run a SQL query and store the results in a Pandas DataFrame](https://github.com/apache/datafusion-python/blob/main/examples/sql-to-pandas.py) -- [Run a SQL query with a Python user-defined function (UDF)](https://github.com/apache/datafusion-python/blob/main/examples/sql-using-python-udf.py) -- [Run a SQL query with a Python user-defined aggregation function (UDAF)](https://github.com/apache/datafusion-python/blob/main/examples/sql-using-python-udaf.py) -- [Query PyArrow Data](https://github.com/apache/datafusion-python/blob/main/examples/query-pyarrow-data.py) -- [Create dataframe](https://github.com/apache/datafusion-python/blob/main/examples/import.py) -- [Export dataframe](https://github.com/apache/datafusion-python/blob/main/examples/export.py) - -### Running User-Defined Python Code - -- [Register a Python UDF with DataFusion](https://github.com/apache/datafusion-python/blob/main/examples/python-udf.py) -- [Register a Python UDAF with DataFusion](https://github.com/apache/datafusion-python/blob/main/examples/python-udaf.py) - -### Substrait Support - -- [Serialize query plans using Substrait](https://github.com/apache/datafusion-python/blob/main/examples/substrait.py) - -## How to install - -### uv - -```bash -uv add datafusion -``` - -### Pip - -```bash -pip install datafusion -# or -python -m pip install datafusion -``` - -### Conda - -```bash -conda install -c conda-forge datafusion -``` - -You can verify the installation by running: - -```python ->>> import datafusion ->>> datafusion.__version__ -'0.6.0' -``` - -## How to develop - -This assumes that you have rust and cargo installed. We use the workflow recommended by [pyo3](https://github.com/PyO3/pyo3) and [maturin](https://github.com/PyO3/maturin). The Maturin tools used in this workflow can be installed either via `uv` or `pip`. Both approaches should offer the same experience. It is recommended to use `uv` since it has significant performance improvements -over `pip`. - -Currently for protobuf support either [protobuf](https://protobuf.dev/installation/) or cmake must be installed. - -Bootstrap (`uv`): - -By default `uv` will attempt to build the datafusion python package. For our development we prefer to build manually. This means -that when creating your virtual environment using `uv sync` you need to pass in the additional `--no-install-package datafusion` -and for `uv run` commands the additional parameter `--no-project` - -```bash -# fetch this repo -git clone git@github.com:apache/datafusion-python.git -# cd to the repo root -cd datafusion-python/ -# create the virtual environment -uv sync --dev --no-install-package datafusion -# activate the environment -source .venv/bin/activate -``` - -Bootstrap (`pip`): - -```bash -# fetch this repo -git clone git@github.com:apache/datafusion-python.git -# cd to the repo root -cd datafusion-python/ -# prepare development environment (used to build wheel / install in development) -python3 -m venv .venv -# activate the venv -source .venv/bin/activate -# update pip itself if necessary -python -m pip install -U pip -# install dependencies -python -m pip install -r pyproject.toml -``` - -The tests rely on test data in git submodules. - -```bash -git submodule update --init -``` - -Whenever rust code changes (your changes or via `git pull`): - -```bash -# make sure you activate the venv using "source venv/bin/activate" first -maturin develop --uv -python -m pytest -``` - -Alternatively if you are using `uv` you can do the following without -needing to activate the virtual environment: - -```bash -uv run --no-project maturin develop --uv -uv run --no-project pytest . -``` - -### Running & Installing pre-commit hooks - -`datafusion-python` takes advantage of [pre-commit](https://pre-commit.com/) to assist developers with code linting to help reduce -the number of commits that ultimately fail in CI due to linter errors. Using the pre-commit hooks is optional for the -developer but certainly helpful for keeping PRs clean and concise. - -Our pre-commit hooks can be installed by running `pre-commit install`, which will install the configurations in -your DATAFUSION_PYTHON_ROOT/.github directory and run each time you perform a commit, failing to complete -the commit if an offending lint is found allowing you to make changes locally before pushing. - -The pre-commit hooks can also be run adhoc without installing them by simply running `pre-commit run --all-files`. - -NOTE: the current `pre-commit` hooks require docker, and cmake. See note on protobuf above. - -## Running linters without using pre-commit - -There are scripts in `ci/scripts` for running Rust and Python linters. - -```shell -./ci/scripts/python_lint.sh -./ci/scripts/rust_clippy.sh -./ci/scripts/rust_fmt.sh -./ci/scripts/rust_toml_fmt.sh -``` - -## How to update dependencies - -To change test dependencies, change the `pyproject.toml` and run - -```bash -uv sync --dev --no-install-package datafusion -``` diff --git a/docs/source/images/jupyter_lab_df_view.png b/_images/jupyter_lab_df_view.png similarity index 100% rename from docs/source/images/jupyter_lab_df_view.png rename to _images/jupyter_lab_df_view.png diff --git a/_sources/autoapi/datafusion/catalog/index.rst.txt b/_sources/autoapi/datafusion/catalog/index.rst.txt new file mode 100644 index 000000000..f2c9776f3 --- /dev/null +++ b/_sources/autoapi/datafusion/catalog/index.rst.txt @@ -0,0 +1,384 @@ +datafusion.catalog +================== + +.. py:module:: datafusion.catalog + +.. autoapi-nested-parse:: + + Data catalog providers. + + + +Classes +------- + +.. autoapisummary:: + + datafusion.catalog.Catalog + datafusion.catalog.CatalogList + datafusion.catalog.CatalogProvider + datafusion.catalog.CatalogProviderList + datafusion.catalog.Schema + datafusion.catalog.SchemaProvider + datafusion.catalog.Table + + +Module Contents +--------------- + +.. py:class:: Catalog(catalog: datafusion._internal.catalog.RawCatalog) + + DataFusion data catalog. + + This constructor is not typically called by the end user. + + + .. py:method:: __repr__() -> str + + Print a string representation of the catalog. + + + + .. py:method:: database(name: str = 'public') -> Schema + + Returns the database with the given ``name`` from this catalog. + + + + .. py:method:: deregister_schema(name: str, cascade: bool = True) -> Schema | None + + Deregister a schema from this catalog. + + + + .. py:method:: memory_catalog(ctx: datafusion.SessionContext | None = None) -> Catalog + :staticmethod: + + + Create an in-memory catalog provider. + + + + .. py:method:: names() -> set[str] + + This is an alias for `schema_names`. + + + + .. py:method:: register_schema(name: str, schema: Schema | SchemaProvider | SchemaProviderExportable) -> Schema | None + + Register a schema with this catalog. + + + + .. py:method:: schema(name: str = 'public') -> Schema + + Returns the database with the given ``name`` from this catalog. + + + + .. py:method:: schema_names() -> set[str] + + Returns the list of schemas in this catalog. + + + + .. py:attribute:: catalog + + +.. py:class:: CatalogList(catalog_list: datafusion._internal.catalog.RawCatalogList) + + DataFusion data catalog list. + + This constructor is not typically called by the end user. + + + .. py:method:: __repr__() -> str + + Print a string representation of the catalog list. + + + + .. py:method:: catalog(name: str = 'datafusion') -> Catalog + + Returns the catalog with the given ``name`` from this catalog. + + + + .. py:method:: catalog_names() -> set[str] + + Returns the list of schemas in this catalog. + + + + .. py:method:: memory_catalog(ctx: datafusion.SessionContext | None = None) -> CatalogList + :staticmethod: + + + Create an in-memory catalog provider list. + + + + .. py:method:: names() -> set[str] + + This is an alias for `catalog_names`. + + + + .. py:method:: register_catalog(name: str, catalog: Catalog | CatalogProvider | CatalogProviderExportable) -> Catalog | None + + Register a catalog with this catalog list. + + + + .. py:attribute:: catalog_list + + +.. py:class:: CatalogProvider + + Bases: :py:obj:`abc.ABC` + + + Abstract class for defining a Python based Catalog Provider. + + + .. py:method:: deregister_schema(name: str, cascade: bool) -> None + + Remove a schema from this catalog. + + This method is optional. If your catalog provides a fixed list of schemas, you + do not need to implement this method. + + :param name: The name of the schema to remove. + :param cascade: If true, deregister the tables within the schema. + + + + .. py:method:: register_schema(name: str, schema: SchemaProviderExportable | SchemaProvider | Schema) -> None + + Add a schema to this catalog. + + This method is optional. If your catalog provides a fixed list of schemas, you + do not need to implement this method. + + + + .. py:method:: schema(name: str) -> Schema | None + :abstractmethod: + + + Retrieve a specific schema from this catalog. + + + + .. py:method:: schema_names() -> set[str] + :abstractmethod: + + + Set of the names of all schemas in this catalog. + + + +.. py:class:: CatalogProviderList + + Bases: :py:obj:`abc.ABC` + + + Abstract class for defining a Python based Catalog Provider List. + + + .. py:method:: catalog(name: str) -> CatalogProviderExportable | CatalogProvider | Catalog | None + :abstractmethod: + + + Retrieve a specific catalog from this catalog list. + + + + .. py:method:: catalog_names() -> set[str] + :abstractmethod: + + + Set of the names of all catalogs in this catalog list. + + + + .. py:method:: register_catalog(name: str, catalog: CatalogProviderExportable | CatalogProvider | Catalog) -> None + + Add a catalog to this catalog list. + + This method is optional. If your catalog provides a fixed list of catalogs, you + do not need to implement this method. + + + +.. py:class:: Schema(schema: datafusion._internal.catalog.RawSchema) + + DataFusion Schema. + + This constructor is not typically called by the end user. + + + .. py:method:: __repr__() -> str + + Print a string representation of the schema. + + + + .. py:method:: deregister_table(name: str) -> None + + Deregister a table provider from this schema. + + + + .. py:method:: memory_schema(ctx: datafusion.SessionContext | None = None) -> Schema + :staticmethod: + + + Create an in-memory schema provider. + + + + .. py:method:: names() -> set[str] + + This is an alias for `table_names`. + + + + .. py:method:: register_table(name: str, table: Table | datafusion.context.TableProviderExportable | datafusion.DataFrame | pyarrow.dataset.Dataset) -> None + + Register a table in this schema. + + + + .. py:method:: table(name: str) -> Table + + Return the table with the given ``name`` from this schema. + + + + .. py:method:: table_exist(name: str) -> bool + + Determines if a table exists in this schema. + + + + .. py:method:: table_names() -> set[str] + + Returns the list of all tables in this schema. + + + + .. py:attribute:: _raw_schema + + +.. py:class:: SchemaProvider + + Bases: :py:obj:`abc.ABC` + + + Abstract class for defining a Python based Schema Provider. + + + .. py:method:: deregister_table(name: str, cascade: bool) -> None + + Remove a table from this schema. + + This method is optional. If your schema provides a fixed list of tables, you do + not need to implement this method. + + + + .. py:method:: owner_name() -> str | None + + Returns the owner of the schema. + + This is an optional method. The default return is None. + + + + .. py:method:: register_table(name: str, table: Table | datafusion.context.TableProviderExportable | Any) -> None + + Add a table to this schema. + + This method is optional. If your schema provides a fixed list of tables, you do + not need to implement this method. + + + + .. py:method:: table(name: str) -> Table | None + :abstractmethod: + + + Retrieve a specific table from this schema. + + + + .. py:method:: table_exist(name: str) -> bool + :abstractmethod: + + + Returns true if the table exists in this schema. + + + + .. py:method:: table_names() -> set[str] + :abstractmethod: + + + Set of the names of all tables in this schema. + + + +.. py:class:: Table(table: Table | datafusion.context.TableProviderExportable | datafusion.DataFrame | pyarrow.dataset.Dataset, ctx: datafusion.SessionContext | None = None) + + A DataFusion table. + + Internally we currently support the following types of tables: + + - Tables created using built-in DataFusion methods, such as + reading from CSV or Parquet + - pyarrow datasets + - DataFusion DataFrames, which will be converted into a view + - Externally provided tables implemented with the FFI PyCapsule + interface (advanced) + + Constructor. + + + .. py:method:: __repr__() -> str + + Print a string representation of the table. + + + + .. py:method:: from_dataset(dataset: pyarrow.dataset.Dataset) -> Table + :staticmethod: + + + Turn a :mod:`pyarrow.dataset` ``Dataset`` into a :class:`Table`. + + + + .. py:attribute:: __slots__ + :value: ('_inner',) + + + + .. py:attribute:: _inner + + + .. py:property:: kind + :type: str + + + Returns the kind of table. + + + .. py:property:: schema + :type: pyarrow.Schema + + + Returns the schema associated with this table. + + diff --git a/_sources/autoapi/datafusion/context/index.rst.txt b/_sources/autoapi/datafusion/context/index.rst.txt new file mode 100644 index 000000000..f010a0103 --- /dev/null +++ b/_sources/autoapi/datafusion/context/index.rst.txt @@ -0,0 +1,953 @@ +datafusion.context +================== + +.. py:module:: datafusion.context + +.. autoapi-nested-parse:: + + Session Context and it's associated configuration. + + + +Classes +------- + +.. autoapisummary:: + + datafusion.context.ArrowArrayExportable + datafusion.context.ArrowStreamExportable + datafusion.context.RuntimeConfig + datafusion.context.RuntimeEnvBuilder + datafusion.context.SQLOptions + datafusion.context.SessionConfig + datafusion.context.SessionContext + datafusion.context.TableProviderExportable + + +Module Contents +--------------- + +.. py:class:: ArrowArrayExportable + + Bases: :py:obj:`Protocol` + + + Type hint for object exporting Arrow C Array via Arrow PyCapsule Interface. + + https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + + + .. py:method:: __arrow_c_array__(requested_schema: object | None = None) -> tuple[object, object] + + +.. py:class:: ArrowStreamExportable + + Bases: :py:obj:`Protocol` + + + Type hint for object exporting Arrow C Stream via Arrow PyCapsule Interface. + + https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + + + .. py:method:: __arrow_c_stream__(requested_schema: object | None = None) -> object + + +.. py:class:: RuntimeConfig + + Bases: :py:obj:`RuntimeEnvBuilder` + + + See `RuntimeEnvBuilder`. + + Create a new :py:class:`RuntimeEnvBuilder` with default values. + + +.. py:class:: RuntimeEnvBuilder + + Runtime configuration options. + + Create a new :py:class:`RuntimeEnvBuilder` with default values. + + + .. py:method:: with_disk_manager_disabled() -> RuntimeEnvBuilder + + Disable the disk manager, attempts to create temporary files will error. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + + + .. py:method:: with_disk_manager_os() -> RuntimeEnvBuilder + + Use the operating system's temporary directory for disk manager. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + + + .. py:method:: with_disk_manager_specified(*paths: str | pathlib.Path) -> RuntimeEnvBuilder + + Use the specified paths for the disk manager's temporary files. + + :param paths: Paths to use for the disk manager's temporary files. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + + + .. py:method:: with_fair_spill_pool(size: int) -> RuntimeEnvBuilder + + Use a fair spill pool with the specified size. + + This pool works best when you know beforehand the query has multiple spillable + operators that will likely all need to spill. Sometimes it will cause spills + even when there was sufficient memory (reserved for other operators) to avoid + doing so:: + + ┌───────────────────────z──────────────────────z───────────────┐ + │ z z │ + │ z z │ + │ Spillable z Unspillable z Free │ + │ Memory z Memory z Memory │ + │ z z │ + │ z z │ + └───────────────────────z──────────────────────z───────────────┘ + + :param size: Size of the memory pool in bytes. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + Examples usage:: + + config = RuntimeEnvBuilder().with_fair_spill_pool(1024) + + + + .. py:method:: with_greedy_memory_pool(size: int) -> RuntimeEnvBuilder + + Use a greedy memory pool with the specified size. + + This pool works well for queries that do not need to spill or have a single + spillable operator. See :py:func:`with_fair_spill_pool` if there are + multiple spillable operators that all will spill. + + :param size: Size of the memory pool in bytes. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + Example usage:: + + config = RuntimeEnvBuilder().with_greedy_memory_pool(1024) + + + + .. py:method:: with_temp_file_path(path: str | pathlib.Path) -> RuntimeEnvBuilder + + Use the specified path to create any needed temporary files. + + :param path: Path to use for temporary files. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + Example usage:: + + config = RuntimeEnvBuilder().with_temp_file_path("/tmp") + + + + .. py:method:: with_unbounded_memory_pool() -> RuntimeEnvBuilder + + Use an unbounded memory pool. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + + + .. py:attribute:: config_internal + + +.. py:class:: SQLOptions + + Options to be used when performing SQL queries. + + Create a new :py:class:`SQLOptions` with default values. + + The default values are: + - DDL commands are allowed + - DML commands are allowed + - Statements are allowed + + + .. py:method:: with_allow_ddl(allow: bool = True) -> SQLOptions + + Should DDL (Data Definition Language) commands be run? + + Examples of DDL commands include ``CREATE TABLE`` and ``DROP TABLE``. + + :param allow: Allow DDL commands to be run. + + :returns: A new :py:class:`SQLOptions` object with the updated setting. + + Example usage:: + + options = SQLOptions().with_allow_ddl(True) + + + + .. py:method:: with_allow_dml(allow: bool = True) -> SQLOptions + + Should DML (Data Manipulation Language) commands be run? + + Examples of DML commands include ``INSERT INTO`` and ``DELETE``. + + :param allow: Allow DML commands to be run. + + :returns: A new :py:class:`SQLOptions` object with the updated setting. + + Example usage:: + + options = SQLOptions().with_allow_dml(True) + + + + .. py:method:: with_allow_statements(allow: bool = True) -> SQLOptions + + Should statements such as ``SET VARIABLE`` and ``BEGIN TRANSACTION`` be run? + + :param allow: Allow statements to be run. + + :returns: py:class:SQLOptions` object with the updated setting. + :rtype: A new + + Example usage:: + + options = SQLOptions().with_allow_statements(True) + + + + .. py:attribute:: options_internal + + +.. py:class:: SessionConfig(config_options: dict[str, str] | None = None) + + Session configuration options. + + Create a new :py:class:`SessionConfig` with the given configuration options. + + :param config_options: Configuration options. + + + .. py:method:: set(key: str, value: str) -> SessionConfig + + Set a configuration option. + + Args: + key: Option key. + value: Option value. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_batch_size(batch_size: int) -> SessionConfig + + Customize batch size. + + :param batch_size: Batch size. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_create_default_catalog_and_schema(enabled: bool = True) -> SessionConfig + + Control if the default catalog and schema will be automatically created. + + :param enabled: Whether the default catalog and schema will be + automatically created. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_default_catalog_and_schema(catalog: str, schema: str) -> SessionConfig + + Select a name for the default catalog and schema. + + :param catalog: Catalog name. + :param schema: Schema name. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_information_schema(enabled: bool = True) -> SessionConfig + + Enable or disable the inclusion of ``information_schema`` virtual tables. + + :param enabled: Whether to include ``information_schema`` virtual tables. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_parquet_pruning(enabled: bool = True) -> SessionConfig + + Enable or disable the use of pruning predicate for parquet readers. + + Pruning predicates will enable the reader to skip row groups. + + :param enabled: Whether to use pruning predicate for parquet readers. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_repartition_aggregations(enabled: bool = True) -> SessionConfig + + Enable or disable the use of repartitioning for aggregations. + + Enabling this improves parallelism. + + :param enabled: Whether to use repartitioning for aggregations. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_repartition_file_min_size(size: int) -> SessionConfig + + Set minimum file range size for repartitioning scans. + + :param size: Minimum file range size. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_repartition_file_scans(enabled: bool = True) -> SessionConfig + + Enable or disable the use of repartitioning for file scans. + + :param enabled: Whether to use repartitioning for file scans. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_repartition_joins(enabled: bool = True) -> SessionConfig + + Enable or disable the use of repartitioning for joins to improve parallelism. + + :param enabled: Whether to use repartitioning for joins. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_repartition_sorts(enabled: bool = True) -> SessionConfig + + Enable or disable the use of repartitioning for window functions. + + This may improve parallelism. + + :param enabled: Whether to use repartitioning for window functions. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_repartition_windows(enabled: bool = True) -> SessionConfig + + Enable or disable the use of repartitioning for window functions. + + This may improve parallelism. + + :param enabled: Whether to use repartitioning for window functions. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_target_partitions(target_partitions: int) -> SessionConfig + + Customize the number of target partitions for query execution. + + Increasing partitions can increase concurrency. + + :param target_partitions: Number of target partitions. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:attribute:: config_internal + + +.. py:class:: SessionContext(config: SessionConfig | None = None, runtime: RuntimeEnvBuilder | None = None) + + This is the main interface for executing queries and creating DataFrames. + + See :ref:`user_guide_concepts` in the online documentation for more information. + + Main interface for executing queries with DataFusion. + + Maintains the state of the connection between a user and an instance + of the connection between a user and an instance of the DataFusion + engine. + + :param config: Session configuration options. + :param runtime: Runtime configuration options. + + Example usage: + + The following example demonstrates how to use the context to execute + a query against a CSV data source using the :py:class:`DataFrame` API:: + + from datafusion import SessionContext + + ctx = SessionContext() + df = ctx.read_csv("data.csv") + + + .. py:method:: __datafusion_logical_extension_codec__() -> Any + + Access the PyCapsule FFI_LogicalExtensionCodec. + + + + .. py:method:: __datafusion_task_context_provider__() -> Any + + Access the PyCapsule FFI_TaskContextProvider. + + + + .. py:method:: __repr__() -> str + + Print a string representation of the Session Context. + + + + .. py:method:: _convert_file_sort_order(file_sort_order: collections.abc.Sequence[collections.abc.Sequence[datafusion.expr.SortKey]] | None) -> list[list[datafusion._internal.expr.SortExpr]] | None + :staticmethod: + + + Convert nested ``SortKey`` sequences into raw sort expressions. + + Each ``SortKey`` can be a column name string, an ``Expr``, or a + ``SortExpr`` and will be converted using + :func:`datafusion.expr.sort_list_to_raw_sort_list`. + + + + .. py:method:: _convert_table_partition_cols(table_partition_cols: list[tuple[str, str | pyarrow.DataType]]) -> list[tuple[str, pyarrow.DataType]] + :staticmethod: + + + + .. py:method:: catalog(name: str = 'datafusion') -> datafusion.catalog.Catalog + + Retrieve a catalog by name. + + + + .. py:method:: catalog_names() -> set[str] + + Returns the list of catalogs in this context. + + + + .. py:method:: create_dataframe(partitions: list[list[pyarrow.RecordBatch]], name: str | None = None, schema: pyarrow.Schema | None = None) -> datafusion.dataframe.DataFrame + + Create and return a dataframe using the provided partitions. + + :param partitions: :py:class:`pa.RecordBatch` partitions to register. + :param name: Resultant dataframe name. + :param schema: Schema for the partitions. + + :returns: DataFrame representation of the SQL query. + + + + .. py:method:: create_dataframe_from_logical_plan(plan: datafusion.plan.LogicalPlan) -> datafusion.dataframe.DataFrame + + Create a :py:class:`~datafusion.dataframe.DataFrame` from an existing plan. + + :param plan: Logical plan. + + :returns: DataFrame representation of the logical plan. + + + + .. py:method:: deregister_table(name: str) -> None + + Remove a table from the session. + + + + .. py:method:: empty_table() -> datafusion.dataframe.DataFrame + + Create an empty :py:class:`~datafusion.dataframe.DataFrame`. + + + + .. py:method:: enable_url_table() -> SessionContext + + Control if local files can be queried as tables. + + :returns: A new :py:class:`SessionContext` object with url table enabled. + + + + .. py:method:: execute(plan: datafusion.plan.ExecutionPlan, partitions: int) -> datafusion.record_batch.RecordBatchStream + + Execute the ``plan`` and return the results. + + + + .. py:method:: from_arrow(data: ArrowStreamExportable | ArrowArrayExportable, name: str | None = None) -> datafusion.dataframe.DataFrame + + Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow source. + + The Arrow data source can be any object that implements either + ``__arrow_c_stream__`` or ``__arrow_c_array__``. For the latter, it must return + a struct array. + + Arrow data can be Polars, Pandas, Pyarrow etc. + + :param data: Arrow data source. + :param name: Name of the DataFrame. + + :returns: DataFrame representation of the Arrow table. + + + + .. py:method:: from_arrow_table(data: pyarrow.Table, name: str | None = None) -> datafusion.dataframe.DataFrame + + Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow table. + + This is an alias for :py:func:`from_arrow`. + + + + .. py:method:: from_pandas(data: pandas.DataFrame, name: str | None = None) -> datafusion.dataframe.DataFrame + + Create a :py:class:`~datafusion.dataframe.DataFrame` from a Pandas DataFrame. + + :param data: Pandas DataFrame. + :param name: Name of the DataFrame. + + :returns: DataFrame representation of the Pandas DataFrame. + + + + .. py:method:: from_polars(data: polars.DataFrame, name: str | None = None) -> datafusion.dataframe.DataFrame + + Create a :py:class:`~datafusion.dataframe.DataFrame` from a Polars DataFrame. + + :param data: Polars DataFrame. + :param name: Name of the DataFrame. + + :returns: DataFrame representation of the Polars DataFrame. + + + + .. py:method:: from_pydict(data: dict[str, list[Any]], name: str | None = None) -> datafusion.dataframe.DataFrame + + Create a :py:class:`~datafusion.dataframe.DataFrame` from a dictionary. + + :param data: Dictionary of lists. + :param name: Name of the DataFrame. + + :returns: DataFrame representation of the dictionary of lists. + + + + .. py:method:: from_pylist(data: list[dict[str, Any]], name: str | None = None) -> datafusion.dataframe.DataFrame + + Create a :py:class:`~datafusion.dataframe.DataFrame` from a list. + + :param data: List of dictionaries. + :param name: Name of the DataFrame. + + :returns: DataFrame representation of the list of dictionaries. + + + + .. py:method:: global_ctx() -> SessionContext + :classmethod: + + + Retrieve the global context as a `SessionContext` wrapper. + + :returns: A `SessionContext` object that wraps the global `SessionContextInternal`. + + + + .. py:method:: read_avro(path: str | pathlib.Path, schema: pyarrow.Schema | None = None, file_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_extension: str = '.avro') -> datafusion.dataframe.DataFrame + + Create a :py:class:`DataFrame` for reading Avro data source. + + :param path: Path to the Avro file. + :param schema: The data source schema. + :param file_partition_cols: Partition columns. + :param file_extension: File extension to select. + + :returns: DataFrame representation of the read Avro file + + + + .. py:method:: read_csv(path: str | pathlib.Path | list[str] | list[pathlib.Path], schema: pyarrow.Schema | None = None, has_header: bool = True, delimiter: str = ',', schema_infer_max_records: int = DEFAULT_MAX_INFER_SCHEMA, file_extension: str = '.csv', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None, options: datafusion.options.CsvReadOptions | None = None) -> datafusion.dataframe.DataFrame + + Read a CSV data source. + + :param path: Path to the CSV file + :param schema: An optional schema representing the CSV files. If None, the + CSV reader will try to infer it based on data in file. + :param has_header: Whether the CSV file have a header. If schema inference + is run on a file with no headers, default column names are + created. + :param delimiter: An optional column delimiter. + :param schema_infer_max_records: Maximum number of rows to read from CSV + files for schema inference if needed. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param table_partition_cols: Partition columns. + :param file_compression_type: File compression type. + :param options: Set advanced options for CSV reading. This cannot be + combined with any of the other options in this method. + + :returns: DataFrame representation of the read CSV files + + + + .. py:method:: read_json(path: str | pathlib.Path, schema: pyarrow.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = '.json', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None) -> datafusion.dataframe.DataFrame + + Read a line-delimited JSON data source. + + :param path: Path to the JSON file. + :param schema: The data source schema. + :param schema_infer_max_records: Maximum number of rows to read from JSON + files for schema inference if needed. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param table_partition_cols: Partition columns. + :param file_compression_type: File compression type. + + :returns: DataFrame representation of the read JSON files. + + + + .. py:method:: read_parquet(path: str | pathlib.Path, table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, parquet_pruning: bool = True, file_extension: str = '.parquet', skip_metadata: bool = True, schema: pyarrow.Schema | None = None, file_sort_order: collections.abc.Sequence[collections.abc.Sequence[datafusion.expr.SortKey]] | None = None) -> datafusion.dataframe.DataFrame + + Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. + + :param path: Path to the Parquet file. + :param table_partition_cols: Partition columns. + :param parquet_pruning: Whether the parquet reader should use the predicate + to prune row groups. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param skip_metadata: Whether the parquet reader should skip any metadata + that may be in the file schema. This can help avoid schema + conflicts due to metadata. + :param schema: An optional schema representing the parquet files. If None, + the parquet reader will try to infer it based on data in the + file. + :param file_sort_order: Sort order for the file. Each sort key can be + specified as a column name (``str``), an expression + (``Expr``), or a ``SortExpr``. + + :returns: DataFrame representation of the read Parquet files + + + + .. py:method:: read_table(table: datafusion.catalog.Table | TableProviderExportable | datafusion.dataframe.DataFrame | pyarrow.dataset.Dataset) -> datafusion.dataframe.DataFrame + + Creates a :py:class:`~datafusion.dataframe.DataFrame` from a table. + + + + .. py:method:: register_avro(name: str, path: str | pathlib.Path, schema: pyarrow.Schema | None = None, file_extension: str = '.avro', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None) -> None + + Register an Avro file as a table. + + The registered table can be referenced from SQL statement executed against + this context. + + :param name: Name of the table to register. + :param path: Path to the Avro file. + :param schema: The data source schema. + :param file_extension: File extension to select. + :param table_partition_cols: Partition columns. + + + + .. py:method:: register_catalog_provider(name: str, provider: datafusion.catalog.CatalogProviderExportable | datafusion.catalog.CatalogProvider | datafusion.catalog.Catalog) -> None + + Register a catalog provider. + + + + .. py:method:: register_catalog_provider_list(provider: datafusion.catalog.CatalogProviderListExportable | datafusion.catalog.CatalogProviderList | datafusion.catalog.CatalogList) -> None + + Register a catalog provider list. + + + + .. py:method:: register_csv(name: str, path: str | pathlib.Path | list[str | pathlib.Path], schema: pyarrow.Schema | None = None, has_header: bool = True, delimiter: str = ',', schema_infer_max_records: int = DEFAULT_MAX_INFER_SCHEMA, file_extension: str = '.csv', file_compression_type: str | None = None, options: datafusion.options.CsvReadOptions | None = None) -> None + + Register a CSV file as a table. + + The registered table can be referenced from SQL statement executed against. + + :param name: Name of the table to register. + :param path: Path to the CSV file. It also accepts a list of Paths. + :param schema: An optional schema representing the CSV file. If None, the + CSV reader will try to infer it based on data in file. + :param has_header: Whether the CSV file have a header. If schema inference + is run on a file with no headers, default column names are + created. + :param delimiter: An optional column delimiter. + :param schema_infer_max_records: Maximum number of rows to read from CSV + files for schema inference if needed. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param file_compression_type: File compression type. + :param options: Set advanced options for CSV reading. This cannot be + combined with any of the other options in this method. + + + + .. py:method:: register_dataset(name: str, dataset: pyarrow.dataset.Dataset) -> None + + Register a :py:class:`pa.dataset.Dataset` as a table. + + :param name: Name of the table to register. + :param dataset: PyArrow dataset. + + + + .. py:method:: register_json(name: str, path: str | pathlib.Path, schema: pyarrow.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = '.json', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None) -> None + + Register a JSON file as a table. + + The registered table can be referenced from SQL statement executed + against this context. + + :param name: Name of the table to register. + :param path: Path to the JSON file. + :param schema: The data source schema. + :param schema_infer_max_records: Maximum number of rows to read from JSON + files for schema inference if needed. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param table_partition_cols: Partition columns. + :param file_compression_type: File compression type. + + + + .. py:method:: register_listing_table(name: str, path: str | pathlib.Path, table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_extension: str = '.parquet', schema: pyarrow.Schema | None = None, file_sort_order: collections.abc.Sequence[collections.abc.Sequence[datafusion.expr.SortKey]] | None = None) -> None + + Register multiple files as a single table. + + Registers a :py:class:`~datafusion.catalog.Table` that can assemble multiple + files from locations in an :py:class:`~datafusion.object_store.ObjectStore` + instance. + + :param name: Name of the resultant table. + :param path: Path to the file to register. + :param table_partition_cols: Partition columns. + :param file_extension: File extension of the provided table. + :param schema: The data source schema. + :param file_sort_order: Sort order for the file. Each sort key can be + specified as a column name (``str``), an expression + (``Expr``), or a ``SortExpr``. + + + + .. py:method:: register_object_store(schema: str, store: Any, host: str | None = None) -> None + + Add a new object store into the session. + + :param schema: The data source schema. + :param store: The :py:class:`~datafusion.object_store.ObjectStore` to register. + :param host: URL for the host. + + + + .. py:method:: register_parquet(name: str, path: str | pathlib.Path, table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, parquet_pruning: bool = True, file_extension: str = '.parquet', skip_metadata: bool = True, schema: pyarrow.Schema | None = None, file_sort_order: collections.abc.Sequence[collections.abc.Sequence[datafusion.expr.SortKey]] | None = None) -> None + + Register a Parquet file as a table. + + The registered table can be referenced from SQL statement executed + against this context. + + :param name: Name of the table to register. + :param path: Path to the Parquet file. + :param table_partition_cols: Partition columns. + :param parquet_pruning: Whether the parquet reader should use the + predicate to prune row groups. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param skip_metadata: Whether the parquet reader should skip any metadata + that may be in the file schema. This can help avoid schema + conflicts due to metadata. + :param schema: The data source schema. + :param file_sort_order: Sort order for the file. Each sort key can be + specified as a column name (``str``), an expression + (``Expr``), or a ``SortExpr``. + + + + .. py:method:: register_record_batches(name: str, partitions: list[list[pyarrow.RecordBatch]]) -> None + + Register record batches as a table. + + This function will convert the provided partitions into a table and + register it into the session using the given name. + + :param name: Name of the resultant table. + :param partitions: Record batches to register as a table. + + + + .. py:method:: register_table(name: str, table: datafusion.catalog.Table | TableProviderExportable | datafusion.dataframe.DataFrame | pyarrow.dataset.Dataset) -> None + + Register a :py:class:`~datafusion.Table` with this context. + + The registered table can be referenced from SQL statements executed against + this context. + + :param name: Name of the resultant table. + :param table: Any object that can be converted into a :class:`Table`. + + + + .. py:method:: register_table_provider(name: str, provider: datafusion.catalog.Table | TableProviderExportable | datafusion.dataframe.DataFrame | pyarrow.dataset.Dataset) -> None + + Register a table provider. + + Deprecated: use :meth:`register_table` instead. + + + + .. py:method:: register_udaf(udaf: datafusion.user_defined.AggregateUDF) -> None + + Register a user-defined aggregation function (UDAF) with the context. + + + + .. py:method:: register_udf(udf: datafusion.user_defined.ScalarUDF) -> None + + Register a user-defined function (UDF) with the context. + + + + .. py:method:: register_udtf(func: datafusion.user_defined.TableFunction) -> None + + Register a user defined table function. + + + + .. py:method:: register_udwf(udwf: datafusion.user_defined.WindowUDF) -> None + + Register a user-defined window function (UDWF) with the context. + + + + .. py:method:: register_view(name: str, df: datafusion.dataframe.DataFrame) -> None + + Register a :py:class:`~datafusion.dataframe.DataFrame` as a view. + + :param name: The name to register the view under. + :type name: str + :param df: The DataFrame to be converted into a view and registered. + :type df: DataFrame + + + + .. py:method:: session_id() -> str + + Return an id that uniquely identifies this :py:class:`SessionContext`. + + + + .. py:method:: sql(query: str, options: SQLOptions | None = None, param_values: dict[str, Any] | None = None, **named_params: Any) -> datafusion.dataframe.DataFrame + + Create a :py:class:`~datafusion.DataFrame` from SQL query text. + + See the online documentation for a description of how to perform + parameterized substitution via either the ``param_values`` option + or passing in ``named_params``. + + Note: This API implements DDL statements such as ``CREATE TABLE`` and + ``CREATE VIEW`` and DML statements such as ``INSERT INTO`` with in-memory + default implementation.See + :py:func:`~datafusion.context.SessionContext.sql_with_options`. + + :param query: SQL query text. + :param options: If provided, the query will be validated against these options. + :param param_values: Provides substitution of scalar values in the query + after parsing. + :param named_params: Provides string or DataFrame substitution in the query string. + + :returns: DataFrame representation of the SQL query. + + + + .. py:method:: sql_with_options(query: str, options: SQLOptions, param_values: dict[str, Any] | None = None, **named_params: Any) -> datafusion.dataframe.DataFrame + + Create a :py:class:`~datafusion.dataframe.DataFrame` from SQL query text. + + This function will first validate that the query is allowed by the + provided options. + + :param query: SQL query text. + :param options: SQL options. + :param param_values: Provides substitution of scalar values in the query + after parsing. + :param named_params: Provides string or DataFrame substitution in the query string. + + :returns: DataFrame representation of the SQL query. + + + + .. py:method:: table(name: str) -> datafusion.dataframe.DataFrame + + Retrieve a previously registered table by name. + + + + .. py:method:: table_exist(name: str) -> bool + + Return whether a table with the given name exists. + + + + .. py:method:: with_logical_extension_codec(codec: Any) -> SessionContext + + Create a new session context with specified codec. + + This only supports codecs that have been implemented using the + FFI interface. + + + + .. py:attribute:: ctx + + +.. py:class:: TableProviderExportable + + Bases: :py:obj:`Protocol` + + + Type hint for object that has __datafusion_table_provider__ PyCapsule. + + https://datafusion.apache.org/python/user-guide/io/table_provider.html + + + .. py:method:: __datafusion_table_provider__(session: Any) -> object + + diff --git a/_sources/autoapi/datafusion/dataframe/index.rst.txt b/_sources/autoapi/datafusion/dataframe/index.rst.txt new file mode 100644 index 000000000..0ac971060 --- /dev/null +++ b/_sources/autoapi/datafusion/dataframe/index.rst.txt @@ -0,0 +1,1162 @@ +datafusion.dataframe +==================== + +.. py:module:: datafusion.dataframe + +.. autoapi-nested-parse:: + + :py:class:`DataFrame` is one of the core concepts in DataFusion. + + See :ref:`user_guide_concepts` in the online documentation for more information. + + + +Classes +------- + +.. autoapisummary:: + + datafusion.dataframe.Compression + datafusion.dataframe.DataFrame + datafusion.dataframe.DataFrameWriteOptions + datafusion.dataframe.InsertOp + datafusion.dataframe.ParquetColumnOptions + datafusion.dataframe.ParquetWriterOptions + + +Module Contents +--------------- + +.. py:class:: Compression + + Bases: :py:obj:`enum.Enum` + + + Enum representing the available compression types for Parquet files. + + + .. py:method:: from_str(value: str) -> Compression + :classmethod: + + + Convert a string to a Compression enum value. + + :param value: The string representation of the compression type. + + :returns: The Compression enum lowercase value. + + :raises ValueError: If the string does not match any Compression enum value. + + + + .. py:method:: get_default_level() -> int | None + + Get the default compression level for the compression type. + + :returns: The default compression level for the compression type. + + + + .. py:attribute:: BROTLI + :value: 'brotli' + + + + .. py:attribute:: GZIP + :value: 'gzip' + + + + .. py:attribute:: LZ4 + :value: 'lz4' + + + + .. py:attribute:: LZ4_RAW + :value: 'lz4_raw' + + + + .. py:attribute:: SNAPPY + :value: 'snappy' + + + + .. py:attribute:: UNCOMPRESSED + :value: 'uncompressed' + + + + .. py:attribute:: ZSTD + :value: 'zstd' + + + +.. py:class:: DataFrame(df: datafusion._internal.DataFrame) + + Two dimensional table representation of data. + + DataFrame objects are iterable; iterating over a DataFrame yields + :class:`datafusion.RecordBatch` instances lazily. + + See :ref:`user_guide_concepts` in the online documentation for more information. + + This constructor is not to be used by the end user. + + See :py:class:`~datafusion.context.SessionContext` for methods to + create a :py:class:`DataFrame`. + + + .. py:method:: __aiter__() -> collections.abc.AsyncIterator[datafusion.record_batch.RecordBatch] + + Return an async iterator over this DataFrame's record batches. + + We're using __aiter__ because we support Python < 3.10 where aiter() is not + available. + + + + .. py:method:: __arrow_c_stream__(requested_schema: object | None = None) -> object + + Export the DataFrame as an Arrow C Stream. + + The DataFrame is executed using DataFusion's streaming APIs and exposed via + Arrow's C Stream interface. Record batches are produced incrementally, so the + full result set is never materialized in memory. + + When ``requested_schema`` is provided, DataFusion applies only simple + projections such as selecting a subset of existing columns or reordering + them. Column renaming, computed expressions, or type coercion are not + supported through this interface. + + :param requested_schema: Either a :py:class:`pyarrow.Schema` or an Arrow C + Schema capsule (``PyCapsule``) produced by + ``schema._export_to_c_capsule()``. The DataFrame will attempt to + align its output with the fields and order specified by this schema. + + :returns: Arrow ``PyCapsule`` object representing an ``ArrowArrayStream``. + + For practical usage patterns, see the Apache Arrow streaming + documentation: https://arrow.apache.org/docs/python/ipc.html#streaming. + + For details on DataFusion's Arrow integration and DataFrame streaming, + see the user guide (user-guide/io/arrow and user-guide/dataframe/index). + + .. rubric:: Notes + + The Arrow C Data Interface PyCapsule details are documented by Apache + Arrow and can be found at: + https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + + + + .. py:method:: __getitem__(key: str | list[str]) -> DataFrame + + Return a new :py:class:`DataFrame` with the specified column or columns. + + :param key: Column name or list of column names to select. + + :returns: DataFrame with the specified column or columns. + + + + .. py:method:: __iter__() -> collections.abc.Iterator[datafusion.record_batch.RecordBatch] + + Return an iterator over this DataFrame's record batches. + + + + .. py:method:: __repr__() -> str + + Return a string representation of the DataFrame. + + :returns: String representation of the DataFrame. + + + + .. py:method:: _repr_html_() -> str + + + .. py:method:: aggregate(group_by: collections.abc.Sequence[datafusion.expr.Expr | str] | datafusion.expr.Expr | str, aggs: collections.abc.Sequence[datafusion.expr.Expr] | datafusion.expr.Expr) -> DataFrame + + Aggregates the rows of the current DataFrame. + + :param group_by: Sequence of expressions or column names to group by. + :param aggs: Sequence of expressions to aggregate. + + :returns: DataFrame after aggregation. + + + + .. py:method:: cache() -> DataFrame + + Cache the DataFrame as a memory table. + + :returns: Cached DataFrame. + + + + .. py:method:: cast(mapping: dict[str, pyarrow.DataType[Any]]) -> DataFrame + + Cast one or more columns to a different data type. + + :param mapping: Mapped with column as key and column dtype as value. + + :returns: DataFrame after casting columns + + + + .. py:method:: collect() -> list[pyarrow.RecordBatch] + + Execute this :py:class:`DataFrame` and collect results into memory. + + Prior to calling ``collect``, modifying a DataFrame simply updates a plan + (no actual computation is performed). Calling ``collect`` triggers the + computation. + + :returns: List of :py:class:`pyarrow.RecordBatch` collected from the DataFrame. + + + + .. py:method:: collect_column(column_name: str) -> pyarrow.Array | pyarrow.ChunkedArray + + Executes this :py:class:`DataFrame` for a single column. + + + + .. py:method:: collect_partitioned() -> list[list[pyarrow.RecordBatch]] + + Execute this DataFrame and collect all partitioned results. + + This operation returns :py:class:`pyarrow.RecordBatch` maintaining the input + partitioning. + + :returns: + + List of list of :py:class:`RecordBatch` collected from the + DataFrame. + + + + .. py:method:: count() -> int + + Return the total number of rows in this :py:class:`DataFrame`. + + Note that this method will actually run a plan to calculate the + count, which may be slow for large or complicated DataFrames. + + :returns: Number of rows in the DataFrame. + + + + .. py:method:: default_str_repr(batches: list[pyarrow.RecordBatch], schema: pyarrow.Schema, has_more: bool, table_uuid: str | None = None) -> str + :staticmethod: + + + Return the default string representation of a DataFrame. + + This method is used by the default formatter and implemented in Rust for + performance reasons. + + + + .. py:method:: describe() -> DataFrame + + Return the statistics for this DataFrame. + + Only summarized numeric datatypes at the moments and returns nulls + for non-numeric datatypes. + + The output format is modeled after pandas. + + :returns: A summary DataFrame containing statistics. + + + + .. py:method:: distinct() -> DataFrame + + Return a new :py:class:`DataFrame` with all duplicated rows removed. + + :returns: DataFrame after removing duplicates. + + + + .. py:method:: drop(*columns: str) -> DataFrame + + Drop arbitrary amount of columns. + + Column names are case-sensitive and do not require double quotes like + other operations such as `select`. Leading and trailing double quotes + are allowed and will be automatically stripped if present. + + :param columns: Column names to drop from the dataframe. Both ``column_name`` + and ``"column_name"`` are accepted. + + :returns: DataFrame with those columns removed in the projection. + + Example Usage:: + + df.drop('ID_For_Students') # Works + df.drop('"ID_For_Students"') # Also works (quotes stripped) + + + + .. py:method:: except_all(other: DataFrame) -> DataFrame + + Calculate the exception of two :py:class:`DataFrame`. + + The two :py:class:`DataFrame` must have exactly the same schema. + + :param other: DataFrame to calculate exception with. + + :returns: DataFrame after exception. + + + + .. py:method:: execute_stream() -> datafusion.record_batch.RecordBatchStream + + Executes this DataFrame and returns a stream over a single partition. + + :returns: Record Batch Stream over a single partition. + + + + .. py:method:: execute_stream_partitioned() -> list[datafusion.record_batch.RecordBatchStream] + + Executes this DataFrame and returns a stream for each partition. + + :returns: One record batch stream per partition. + + + + .. py:method:: execution_plan() -> datafusion.plan.ExecutionPlan + + Return the execution/physical plan. + + :returns: Execution plan. + + + + .. py:method:: explain(verbose: bool = False, analyze: bool = False) -> None + + Print an explanation of the DataFrame's plan so far. + + If ``analyze`` is specified, runs the plan and reports metrics. + + :param verbose: If ``True``, more details will be included. + :param analyze: If ``True``, the plan will run and metrics reported. + + + + .. py:method:: fill_null(value: Any, subset: list[str] | None = None) -> DataFrame + + Fill null values in specified columns with a value. + + :param value: Value to replace nulls with. Will be cast to match column type. + :param subset: Optional list of column names to fill. If None, fills all columns. + + :returns: DataFrame with null values replaced where type casting is possible + + .. rubric:: Examples + + >>> df = df.fill_null(0) # Fill all nulls with 0 where possible + >>> # Fill nulls in specific string columns + >>> df = df.fill_null("missing", subset=["name", "category"]) + + .. rubric:: Notes + + - Only fills nulls in columns where the value can be cast to the column type + - For columns where casting fails, the original column is kept unchanged + - For columns not in subset, the original column is kept unchanged + + + + .. py:method:: filter(*predicates: datafusion.expr.Expr | str) -> DataFrame + + Return a DataFrame for which ``predicate`` evaluates to ``True``. + + Rows for which ``predicate`` evaluates to ``False`` or ``None`` are filtered + out. If more than one predicate is provided, these predicates will be + combined as a logical AND. Each ``predicate`` can be an + :class:`~datafusion.expr.Expr` created using helper functions such as + :func:`datafusion.col` or :func:`datafusion.lit`, or a SQL expression string + that will be parsed against the DataFrame schema. If more complex logic is + required, see the logical operations in :py:mod:`~datafusion.functions`. + + Example:: + + from datafusion import col, lit + df.filter(col("a") > lit(1)) + df.filter("a > 1") + + :param predicates: Predicate expression(s) or SQL strings to filter the DataFrame. + + :returns: DataFrame after filtering. + + + + .. py:method:: head(n: int = 5) -> DataFrame + + Return a new :py:class:`DataFrame` with a limited number of rows. + + :param n: Number of rows to take from the head of the DataFrame. + + :returns: DataFrame after limiting. + + + + .. py:method:: intersect(other: DataFrame) -> DataFrame + + Calculate the intersection of two :py:class:`DataFrame`. + + The two :py:class:`DataFrame` must have exactly the same schema. + + :param other: DataFrame to intersect with. + + :returns: DataFrame after intersection. + + + + .. py:method:: into_view(temporary: bool = False) -> datafusion.catalog.Table + + Convert ``DataFrame`` into a :class:`~datafusion.Table`. + + .. rubric:: Examples + + >>> from datafusion import SessionContext + >>> ctx = SessionContext() + >>> df = ctx.sql("SELECT 1 AS value") + >>> view = df.into_view() + >>> ctx.register_table("values_view", view) + >>> df.collect() # The DataFrame is still usable + >>> ctx.sql("SELECT value FROM values_view").collect() + + + + .. py:method:: join(right: DataFrame, on: str | collections.abc.Sequence[str], how: Literal['inner', 'left', 'right', 'full', 'semi', 'anti'] = 'inner', *, left_on: None = None, right_on: None = None, join_keys: None = None, coalesce_duplicate_keys: bool = True) -> DataFrame + join(right: DataFrame, on: None = None, how: Literal['inner', 'left', 'right', 'full', 'semi', 'anti'] = 'inner', *, left_on: str | collections.abc.Sequence[str], right_on: str | collections.abc.Sequence[str], join_keys: tuple[list[str], list[str]] | None = None, coalesce_duplicate_keys: bool = True) -> DataFrame + join(right: DataFrame, on: None = None, how: Literal['inner', 'left', 'right', 'full', 'semi', 'anti'] = 'inner', *, join_keys: tuple[list[str], list[str]], left_on: None = None, right_on: None = None, coalesce_duplicate_keys: bool = True) -> DataFrame + + Join this :py:class:`DataFrame` with another :py:class:`DataFrame`. + + `on` has to be provided or both `left_on` and `right_on` in conjunction. + + :param right: Other DataFrame to join with. + :param on: Column names to join on in both dataframes. + :param how: Type of join to perform. Supported types are "inner", "left", + "right", "full", "semi", "anti". + :param left_on: Join column of the left dataframe. + :param right_on: Join column of the right dataframe. + :param coalesce_duplicate_keys: When True, coalesce the columns + from the right DataFrame and left DataFrame + that have identical names in the ``on`` fields. + :param join_keys: Tuple of two lists of column names to join on. [Deprecated] + + :returns: DataFrame after join. + + + + .. py:method:: join_on(right: DataFrame, *on_exprs: datafusion.expr.Expr, how: Literal['inner', 'left', 'right', 'full', 'semi', 'anti'] = 'inner') -> DataFrame + + Join two :py:class:`DataFrame` using the specified expressions. + + Join predicates must be :class:`~datafusion.expr.Expr` objects, typically + built with :func:`datafusion.col`. On expressions are used to support + in-equality predicates. Equality predicates are correctly optimized. + + Example:: + + from datafusion import col + df.join_on(other_df, col("id") == col("other_id")) + + :param right: Other DataFrame to join with. + :param on_exprs: single or multiple (in)-equality predicates. + :param how: Type of join to perform. Supported types are "inner", "left", + "right", "full", "semi", "anti". + + :returns: DataFrame after join. + + + + .. py:method:: limit(count: int, offset: int = 0) -> DataFrame + + Return a new :py:class:`DataFrame` with a limited number of rows. + + :param count: Number of rows to limit the DataFrame to. + :param offset: Number of rows to skip. + + :returns: DataFrame after limiting. + + + + .. py:method:: logical_plan() -> datafusion.plan.LogicalPlan + + Return the unoptimized ``LogicalPlan``. + + :returns: Unoptimized logical plan. + + + + .. py:method:: optimized_logical_plan() -> datafusion.plan.LogicalPlan + + Return the optimized ``LogicalPlan``. + + :returns: Optimized logical plan. + + + + .. py:method:: parse_sql_expr(expr: str) -> datafusion.expr.Expr + + Creates logical expression from a SQL query text. + + The expression is created and processed against the current schema. + + Example:: + + from datafusion import col, lit + df.parse_sql_expr("a > 1") + + should produce: + + col("a") > lit(1) + + :param expr: Expression string to be converted to datafusion expression + + :returns: Logical expression . + + + + .. py:method:: repartition(num: int) -> DataFrame + + Repartition a DataFrame into ``num`` partitions. + + The batches allocation uses a round-robin algorithm. + + :param num: Number of partitions to repartition the DataFrame into. + + :returns: Repartitioned DataFrame. + + + + .. py:method:: repartition_by_hash(*exprs: datafusion.expr.Expr | str, num: int) -> DataFrame + + Repartition a DataFrame using a hash partitioning scheme. + + :param exprs: Expressions or a SQL expression string to evaluate + and perform hashing on. + :param num: Number of partitions to repartition the DataFrame into. + + :returns: Repartitioned DataFrame. + + + + .. py:method:: schema() -> pyarrow.Schema + + Return the :py:class:`pyarrow.Schema` of this DataFrame. + + The output schema contains information on the name, data type, and + nullability for each column. + + :returns: Describing schema of the DataFrame + + + + .. py:method:: select(*exprs: datafusion.expr.Expr | str) -> DataFrame + + Project arbitrary expressions into a new :py:class:`DataFrame`. + + :param exprs: Either column names or :py:class:`~datafusion.expr.Expr` to select. + + :returns: DataFrame after projection. It has one column for each expression. + + Example usage: + + The following example will return 3 columns from the original dataframe. + The first two columns will be the original column ``a`` and ``b`` since the + string "a" is assumed to refer to column selection. Also a duplicate of + column ``a`` will be returned with the column name ``alternate_a``:: + + df = df.select("a", col("b"), col("a").alias("alternate_a")) + + + + + .. py:method:: select_columns(*args: str) -> DataFrame + + Filter the DataFrame by columns. + + :returns: DataFrame only containing the specified columns. + + + + .. py:method:: select_exprs(*args: str) -> DataFrame + + Project arbitrary list of expression strings into a new DataFrame. + + This method will parse string expressions into logical plan expressions. + The output DataFrame has one column for each expression. + + :returns: DataFrame only containing the specified columns. + + + + .. py:method:: show(num: int = 20) -> None + + Execute the DataFrame and print the result to the console. + + :param num: Number of lines to show. + + + + .. py:method:: sort(*exprs: datafusion.expr.SortKey) -> DataFrame + + Sort the DataFrame by the specified sorting expressions or column names. + + Note that any expression can be turned into a sort expression by + calling its ``sort`` method. + + :param exprs: Sort expressions or column names, applied in order. + + :returns: DataFrame after sorting. + + + + .. py:method:: tail(n: int = 5) -> DataFrame + + Return a new :py:class:`DataFrame` with a limited number of rows. + + Be aware this could be potentially expensive since the row size needs to be + determined of the dataframe. This is done by collecting it. + + :param n: Number of rows to take from the tail of the DataFrame. + + :returns: DataFrame after limiting. + + + + .. py:method:: to_arrow_table() -> pyarrow.Table + + Execute the :py:class:`DataFrame` and convert it into an Arrow Table. + + :returns: Arrow Table. + + + + .. py:method:: to_pandas() -> pandas.DataFrame + + Execute the :py:class:`DataFrame` and convert it into a Pandas DataFrame. + + :returns: Pandas DataFrame. + + + + .. py:method:: to_polars() -> polars.DataFrame + + Execute the :py:class:`DataFrame` and convert it into a Polars DataFrame. + + :returns: Polars DataFrame. + + + + .. py:method:: to_pydict() -> dict[str, list[Any]] + + Execute the :py:class:`DataFrame` and convert it into a dictionary of lists. + + :returns: Dictionary of lists. + + + + .. py:method:: to_pylist() -> list[dict[str, Any]] + + Execute the :py:class:`DataFrame` and convert it into a list of dictionaries. + + :returns: List of dictionaries. + + + + .. py:method:: transform(func: collections.abc.Callable[Ellipsis, DataFrame], *args: Any) -> DataFrame + + Apply a function to the current DataFrame which returns another DataFrame. + + This is useful for chaining together multiple functions. For example:: + + def add_3(df: DataFrame) -> DataFrame: + return df.with_column("modified", lit(3)) + + def within_limit(df: DataFrame, limit: int) -> DataFrame: + return df.filter(col("a") < lit(limit)).distinct() + + df = df.transform(modify_df).transform(within_limit, 4) + + :param func: A callable function that takes a DataFrame as it's first argument + :param args: Zero or more arguments to pass to `func` + + :returns: After applying func to the original dataframe. + :rtype: DataFrame + + + + .. py:method:: union(other: DataFrame, distinct: bool = False) -> DataFrame + + Calculate the union of two :py:class:`DataFrame`. + + The two :py:class:`DataFrame` must have exactly the same schema. + + :param other: DataFrame to union with. + :param distinct: If ``True``, duplicate rows will be removed. + + :returns: DataFrame after union. + + + + .. py:method:: union_distinct(other: DataFrame) -> DataFrame + + Calculate the distinct union of two :py:class:`DataFrame`. + + The two :py:class:`DataFrame` must have exactly the same schema. + Any duplicate rows are discarded. + + :param other: DataFrame to union with. + + :returns: DataFrame after union. + + + + .. py:method:: unnest_columns(*columns: str, preserve_nulls: bool = True) -> DataFrame + + Expand columns of arrays into a single row per array element. + + :param columns: Column names to perform unnest operation on. + :param preserve_nulls: If False, rows with null entries will not be + returned. + + :returns: A DataFrame with the columns expanded. + + + + .. py:method:: with_column(name: str, expr: datafusion.expr.Expr | str) -> DataFrame + + Add an additional column to the DataFrame. + + The ``expr`` must be an :class:`~datafusion.expr.Expr` constructed with + :func:`datafusion.col` or :func:`datafusion.lit`, or a SQL expression + string that will be parsed against the DataFrame schema. + + Example:: + + from datafusion import col, lit + df.with_column("b", col("a") + lit(1)) + + :param name: Name of the column to add. + :param expr: Expression to compute the column. + + :returns: DataFrame with the new column. + + + + .. py:method:: with_column_renamed(old_name: str, new_name: str) -> DataFrame + + Rename one column by applying a new projection. + + This is a no-op if the column to be renamed does not exist. + + The method supports case sensitive rename with wrapping column name + into one the following symbols (" or ' or \`). + + :param old_name: Old column name. + :param new_name: New column name. + + :returns: DataFrame with the column renamed. + + + + .. py:method:: with_columns(*exprs: datafusion.expr.Expr | str | collections.abc.Iterable[datafusion.expr.Expr | str], **named_exprs: datafusion.expr.Expr | str) -> DataFrame + + Add columns to the DataFrame. + + By passing expressions, iterables of expressions, string SQL expressions, + or named expressions. + All expressions must be :class:`~datafusion.expr.Expr` objects created via + :func:`datafusion.col` or :func:`datafusion.lit`, or SQL expression strings. + To pass named expressions use the form ``name=Expr``. + + Example usage: The following will add 4 columns labeled ``a``, ``b``, ``c``, + and ``d``:: + + from datafusion import col, lit + df = df.with_columns( + col("x").alias("a"), + [lit(1).alias("b"), col("y").alias("c")], + d=lit(3) + ) + + Equivalent example using just SQL strings: + + df = df.with_columns( + "x as a", + ["1 as b", "y as c"], + d="3" + ) + + :param exprs: Either a single expression, an iterable of expressions to add or + SQL expression strings. + :param named_exprs: Named expressions in the form of ``name=expr`` + + :returns: DataFrame with the new columns added. + + + + .. py:method:: write_csv(path: str | pathlib.Path, with_header: bool = False, write_options: DataFrameWriteOptions | None = None) -> None + + Execute the :py:class:`DataFrame` and write the results to a CSV file. + + :param path: Path of the CSV file to write. + :param with_header: If true, output the CSV header row. + :param write_options: Options that impact how the DataFrame is written. + + + + .. py:method:: write_json(path: str | pathlib.Path, write_options: DataFrameWriteOptions | None = None) -> None + + Execute the :py:class:`DataFrame` and write the results to a JSON file. + + :param path: Path of the JSON file to write. + :param write_options: Options that impact how the DataFrame is written. + + + + .. py:method:: write_parquet(path: str | pathlib.Path, compression: str, compression_level: int | None = None, write_options: DataFrameWriteOptions | None = None) -> None + write_parquet(path: str | pathlib.Path, compression: Compression = Compression.ZSTD, compression_level: int | None = None, write_options: DataFrameWriteOptions | None = None) -> None + write_parquet(path: str | pathlib.Path, compression: ParquetWriterOptions, compression_level: None = None, write_options: DataFrameWriteOptions | None = None) -> None + + Execute the :py:class:`DataFrame` and write the results to a Parquet file. + + Available compression types are: + + - "uncompressed": No compression. + - "snappy": Snappy compression. + - "gzip": Gzip compression. + - "brotli": Brotli compression. + - "lz4": LZ4 compression. + - "lz4_raw": LZ4_RAW compression. + - "zstd": Zstandard compression. + + LZO compression is not yet implemented in arrow-rs and is therefore + excluded. + + :param path: Path of the Parquet file to write. + :param compression: Compression type to use. Default is "ZSTD". + :param compression_level: Compression level to use. For ZSTD, the + recommended range is 1 to 22, with the default being 4. Higher levels + provide better compression but slower speed. + :param write_options: Options that impact how the DataFrame is written. + + + + .. py:method:: write_parquet_with_options(path: str | pathlib.Path, options: ParquetWriterOptions, write_options: DataFrameWriteOptions | None = None) -> None + + Execute the :py:class:`DataFrame` and write the results to a Parquet file. + + Allows advanced writer options to be set with `ParquetWriterOptions`. + + :param path: Path of the Parquet file to write. + :param options: Sets the writer parquet options (see `ParquetWriterOptions`). + :param write_options: Options that impact how the DataFrame is written. + + + + .. py:method:: write_table(table_name: str, write_options: DataFrameWriteOptions | None = None) -> None + + Execute the :py:class:`DataFrame` and write the results to a table. + + The table must be registered with the session to perform this operation. + Not all table providers support writing operations. See the individual + implementations for details. + + + + .. py:attribute:: df + + +.. py:class:: DataFrameWriteOptions(insert_operation: InsertOp | None = None, single_file_output: bool = False, partition_by: str | collections.abc.Sequence[str] | None = None, sort_by: datafusion.expr.Expr | datafusion.expr.SortExpr | collections.abc.Sequence[datafusion.expr.Expr] | collections.abc.Sequence[datafusion.expr.SortExpr] | None = None) + + Writer options for DataFrame. + + There is no guarantee the table provider supports all writer options. + See the individual implementation and documentation for details. + + Instantiate writer options for DataFrame. + + + .. py:attribute:: _raw_write_options + + +.. py:class:: InsertOp + + Bases: :py:obj:`enum.Enum` + + + Insert operation mode. + + These modes are used by the table writing feature to define how record + batches should be written to a table. + + + .. py:attribute:: APPEND + + Appends new rows to the existing table without modifying any existing rows. + + + .. py:attribute:: OVERWRITE + + Overwrites all existing rows in the table with the new rows. + + + .. py:attribute:: REPLACE + + Replace existing rows that collide with the inserted rows. + + Replacement is typically based on a unique key or primary key. + + +.. py:class:: ParquetColumnOptions(encoding: str | None = None, dictionary_enabled: bool | None = None, compression: str | None = None, statistics_enabled: str | None = None, bloom_filter_enabled: bool | None = None, bloom_filter_fpp: float | None = None, bloom_filter_ndv: int | None = None) + + Parquet options for individual columns. + + Contains the available options that can be applied for an individual Parquet column, + replacing the global options in ``ParquetWriterOptions``. + + Initialize the ParquetColumnOptions. + + :param encoding: Sets encoding for the column path. Valid values are: ``plain``, + ``plain_dictionary``, ``rle``, ``bit_packed``, ``delta_binary_packed``, + ``delta_length_byte_array``, ``delta_byte_array``, ``rle_dictionary``, + and ``byte_stream_split``. These values are not case-sensitive. If + ``None``, uses the default parquet options + :param dictionary_enabled: Sets if dictionary encoding is enabled for the column + path. If `None`, uses the default parquet options + :param compression: Sets default parquet compression codec for the column path. + Valid values are ``uncompressed``, ``snappy``, ``gzip(level)``, ``lzo``, + ``brotli(level)``, ``lz4``, ``zstd(level)``, and ``lz4_raw``. These + values are not case-sensitive. If ``None``, uses the default parquet + options. + :param statistics_enabled: Sets if statistics are enabled for the column Valid + values are: ``none``, ``chunk``, and ``page`` These values are not case + sensitive. If ``None``, uses the default parquet options. + :param bloom_filter_enabled: Sets if bloom filter is enabled for the column path. + If ``None``, uses the default parquet options. + :param bloom_filter_fpp: Sets bloom filter false positive probability for the + column path. If ``None``, uses the default parquet options. + :param bloom_filter_ndv: Sets bloom filter number of distinct values. If ``None``, + uses the default parquet options. + + + .. py:attribute:: bloom_filter_enabled + :value: None + + + + .. py:attribute:: bloom_filter_fpp + :value: None + + + + .. py:attribute:: bloom_filter_ndv + :value: None + + + + .. py:attribute:: compression + :value: None + + + + .. py:attribute:: dictionary_enabled + :value: None + + + + .. py:attribute:: encoding + :value: None + + + + .. py:attribute:: statistics_enabled + :value: None + + + +.. py:class:: ParquetWriterOptions(data_pagesize_limit: int = 1024 * 1024, write_batch_size: int = 1024, writer_version: str = '1.0', skip_arrow_metadata: bool = False, compression: str | None = 'zstd(3)', compression_level: int | None = None, dictionary_enabled: bool | None = True, dictionary_page_size_limit: int = 1024 * 1024, statistics_enabled: str | None = 'page', max_row_group_size: int = 1024 * 1024, created_by: str = 'datafusion-python', column_index_truncate_length: int | None = 64, statistics_truncate_length: int | None = None, data_page_row_count_limit: int = 20000, encoding: str | None = None, bloom_filter_on_write: bool = False, bloom_filter_fpp: float | None = None, bloom_filter_ndv: int | None = None, allow_single_file_parallelism: bool = True, maximum_parallel_row_group_writers: int = 1, maximum_buffered_record_batches_per_stream: int = 2, column_specific_options: dict[str, ParquetColumnOptions] | None = None) + + Advanced parquet writer options. + + Allows settings the writer options that apply to the entire file. Some options can + also be set on a column by column basis, with the field ``column_specific_options`` + (see ``ParquetColumnOptions``). + + Initialize the ParquetWriterOptions. + + :param data_pagesize_limit: Sets best effort maximum size of data page in bytes. + :param write_batch_size: Sets write_batch_size in bytes. + :param writer_version: Sets parquet writer version. Valid values are ``1.0`` and + ``2.0``. + :param skip_arrow_metadata: Skip encoding the embedded arrow metadata in the + KV_meta. + :param compression: Compression type to use. Default is ``zstd(3)``. + Available compression types are + + - ``uncompressed``: No compression. + - ``snappy``: Snappy compression. + - ``gzip(n)``: Gzip compression with level n. + - ``brotli(n)``: Brotli compression with level n. + - ``lz4``: LZ4 compression. + - ``lz4_raw``: LZ4_RAW compression. + - ``zstd(n)``: Zstandard compression with level n. + :param compression_level: Compression level to set. + :param dictionary_enabled: Sets if dictionary encoding is enabled. If ``None``, + uses the default parquet writer setting. + :param dictionary_page_size_limit: Sets best effort maximum dictionary page size, + in bytes. + :param statistics_enabled: Sets if statistics are enabled for any column Valid + values are ``none``, ``chunk``, and ``page``. If ``None``, uses the + default parquet writer setting. + :param max_row_group_size: Target maximum number of rows in each row group + (defaults to 1M rows). Writing larger row groups requires more memory + to write, but can get better compression and be faster to read. + :param created_by: Sets "created by" property. + :param column_index_truncate_length: Sets column index truncate length. + :param statistics_truncate_length: Sets statistics truncate length. If ``None``, + uses the default parquet writer setting. + :param data_page_row_count_limit: Sets best effort maximum number of rows in a data + page. + :param encoding: Sets default encoding for any column. Valid values are ``plain``, + ``plain_dictionary``, ``rle``, ``bit_packed``, ``delta_binary_packed``, + ``delta_length_byte_array``, ``delta_byte_array``, ``rle_dictionary``, + and ``byte_stream_split``. If ``None``, uses the default parquet writer + setting. + :param bloom_filter_on_write: Write bloom filters for all columns when creating + parquet files. + :param bloom_filter_fpp: Sets bloom filter false positive probability. If ``None``, + uses the default parquet writer setting + :param bloom_filter_ndv: Sets bloom filter number of distinct values. If ``None``, + uses the default parquet writer setting. + :param allow_single_file_parallelism: Controls whether DataFusion will attempt to + speed up writing parquet files by serializing them in parallel. Each + column in each row group in each output file are serialized in parallel + leveraging a maximum possible core count of + ``n_files * n_row_groups * n_columns``. + :param maximum_parallel_row_group_writers: By default parallel parquet writer is + tuned for minimum memory usage in a streaming execution plan. You may + see a performance benefit when writing large parquet files by increasing + ``maximum_parallel_row_group_writers`` and + ``maximum_buffered_record_batches_per_stream`` if your system has idle + cores and can tolerate additional memory usage. Boosting these values is + likely worthwhile when writing out already in-memory data, such as from + a cached data frame. + :param maximum_buffered_record_batches_per_stream: See + ``maximum_parallel_row_group_writers``. + :param column_specific_options: Overrides options for specific columns. If a column + is not a part of this dictionary, it will use the parameters provided + here. + + + .. py:attribute:: allow_single_file_parallelism + :value: True + + + + .. py:attribute:: bloom_filter_fpp + :value: None + + + + .. py:attribute:: bloom_filter_ndv + :value: None + + + + .. py:attribute:: bloom_filter_on_write + :value: False + + + + .. py:attribute:: column_index_truncate_length + :value: 64 + + + + .. py:attribute:: column_specific_options + :value: None + + + + .. py:attribute:: created_by + :value: 'datafusion-python' + + + + .. py:attribute:: data_page_row_count_limit + :value: 20000 + + + + .. py:attribute:: data_pagesize_limit + :value: 1048576 + + + + .. py:attribute:: dictionary_enabled + :value: True + + + + .. py:attribute:: dictionary_page_size_limit + :value: 1048576 + + + + .. py:attribute:: encoding + :value: None + + + + .. py:attribute:: max_row_group_size + :value: 1048576 + + + + .. py:attribute:: maximum_buffered_record_batches_per_stream + :value: 2 + + + + .. py:attribute:: maximum_parallel_row_group_writers + :value: 1 + + + + .. py:attribute:: skip_arrow_metadata + :value: False + + + + .. py:attribute:: statistics_enabled + :value: 'page' + + + + .. py:attribute:: statistics_truncate_length + :value: None + + + + .. py:attribute:: write_batch_size + :value: 1024 + + + + .. py:attribute:: writer_version + :value: '1.0' + + + diff --git a/_sources/autoapi/datafusion/dataframe_formatter/index.rst.txt b/_sources/autoapi/datafusion/dataframe_formatter/index.rst.txt new file mode 100644 index 000000000..243627a96 --- /dev/null +++ b/_sources/autoapi/datafusion/dataframe_formatter/index.rst.txt @@ -0,0 +1,530 @@ +datafusion.dataframe_formatter +============================== + +.. py:module:: datafusion.dataframe_formatter + +.. autoapi-nested-parse:: + + HTML formatting utilities for DataFusion DataFrames. + + + +Classes +------- + +.. autoapisummary:: + + datafusion.dataframe_formatter.CellFormatter + datafusion.dataframe_formatter.DataFrameHtmlFormatter + datafusion.dataframe_formatter.DefaultStyleProvider + datafusion.dataframe_formatter.FormatterManager + datafusion.dataframe_formatter.StyleProvider + + +Functions +--------- + +.. autoapisummary:: + + datafusion.dataframe_formatter._refresh_formatter_reference + datafusion.dataframe_formatter._validate_bool + datafusion.dataframe_formatter._validate_formatter_parameters + datafusion.dataframe_formatter._validate_positive_int + datafusion.dataframe_formatter.configure_formatter + datafusion.dataframe_formatter.get_formatter + datafusion.dataframe_formatter.reset_formatter + datafusion.dataframe_formatter.set_formatter + + +Module Contents +--------------- + +.. py:class:: CellFormatter + + Bases: :py:obj:`Protocol` + + + Protocol for cell value formatters. + + + .. py:method:: __call__(value: Any) -> str + + Format a cell value to string representation. + + + +.. py:class:: DataFrameHtmlFormatter(max_cell_length: int = 25, max_width: int = 1000, max_height: int = 300, max_memory_bytes: int = 2 * 1024 * 1024, min_rows: int = 10, max_rows: int | None = None, repr_rows: int | None = None, enable_cell_expansion: bool = True, custom_css: str | None = None, show_truncation_message: bool = True, style_provider: StyleProvider | None = None, use_shared_styles: bool = True) + + Configurable HTML formatter for DataFusion DataFrames. + + This class handles the HTML rendering of DataFrames for display in + Jupyter notebooks and other rich display contexts. + + This class supports extension through composition. Key extension points: + - Provide a custom StyleProvider for styling cells and headers + - Register custom formatters for specific types + - Provide custom cell builders for specialized cell rendering + + :param max_cell_length: Maximum characters to display in a cell before truncation + :param max_width: Maximum width of the HTML table in pixels + :param max_height: Maximum height of the HTML table in pixels + :param max_memory_bytes: Maximum memory in bytes for rendered data (default: 2MB) + :param min_rows: Minimum number of rows to display (must be <= max_rows) + :param max_rows: Maximum number of rows to display in repr output + :param repr_rows: Deprecated alias for max_rows + :param enable_cell_expansion: Whether to add expand/collapse buttons for long cell + values + :param custom_css: Additional CSS to include in the HTML output + :param show_truncation_message: Whether to display a message when data is truncated + :param style_provider: Custom provider for cell and header styles + :param use_shared_styles: Whether to load styles and scripts only once per notebook + session + + Initialize the HTML formatter. + + :param max_cell_length: Maximum length of cell content before truncation. + :param max_width: Maximum width of the displayed table in pixels. + :param max_height: Maximum height of the displayed table in pixels. + :param max_memory_bytes: Maximum memory in bytes for rendered data. Helps prevent performance + issues with large datasets. + :param min_rows: Minimum number of rows to display even if memory limit is reached. + Must not exceed ``max_rows``. + :param max_rows: Maximum number of rows to display. Takes precedence over memory limits + when fewer rows are requested. + :param repr_rows: Deprecated alias for ``max_rows``. Use ``max_rows`` instead. + :param enable_cell_expansion: Whether to allow cells to expand when clicked. + :param custom_css: Custom CSS to apply to the HTML table. + :param show_truncation_message: Whether to show a message indicating that content has been truncated. + :param style_provider: Provider of CSS styles for the HTML table. If None, DefaultStyleProvider + is used. + :param use_shared_styles: Whether to use shared styles across multiple tables. This improves + performance when displaying many DataFrames in a single notebook. + :param Raises: + :param ------: + :param ValueError: If max_cell_length, max_width, max_height, max_memory_bytes, + min_rows or max_rows is not a positive integer, or if min_rows + exceeds max_rows. + :param TypeError: If enable_cell_expansion, show_truncation_message, or use_shared_styles is + not a boolean, or if custom_css is provided but is not a string, or if + style_provider is provided but does not implement the StyleProvider + protocol. + + + .. py:method:: _build_expandable_cell(formatted_value: str, row_count: int, col_idx: int, table_uuid: str) -> str + + Build an expandable cell for long content. + + + + .. py:method:: _build_html_footer(has_more: bool) -> list[str] + + Build the HTML footer with JavaScript and messages. + + + + .. py:method:: _build_html_header() -> list[str] + + Build the HTML header with CSS styles. + + + + .. py:method:: _build_regular_cell(formatted_value: str) -> str + + Build a regular table cell. + + + + .. py:method:: _build_table_body(batches: list, table_uuid: str) -> list[str] + + Build the HTML table body with data rows. + + + + .. py:method:: _build_table_container_start() -> list[str] + + Build the opening tags for the table container. + + + + .. py:method:: _build_table_header(schema: Any) -> list[str] + + Build the HTML table header with column names. + + + + .. py:method:: _format_cell_value(value: Any) -> str + + Format a cell value for display. + + Uses registered type formatters if available. + + :param value: The cell value to format + + :returns: Formatted cell value as string + + + + .. py:method:: _get_cell_value(column: Any, row_idx: int) -> Any + + Extract a cell value from a column. + + :param column: Arrow array + :param row_idx: Row index + + :returns: The raw cell value + + + + .. py:method:: _get_default_css() -> str + + Get default CSS styles for the HTML table. + + + + .. py:method:: _get_javascript() -> str + + Get JavaScript code for interactive elements. + + + + .. py:method:: format_html(batches: list, schema: Any, has_more: bool = False, table_uuid: str | None = None) -> str + + Format record batches as HTML. + + This method is used by DataFrame's _repr_html_ implementation and can be + called directly when custom HTML rendering is needed. + + :param batches: List of Arrow RecordBatch objects + :param schema: Arrow Schema object + :param has_more: Whether there are more batches not shown + :param table_uuid: Unique ID for the table, used for JavaScript interactions + + :returns: HTML string representation of the data + + :raises TypeError: If schema is invalid and no batches are provided + + + + .. py:method:: format_str(batches: list, schema: Any, has_more: bool = False, table_uuid: str | None = None) -> str + + Format record batches as a string. + + This method is used by DataFrame's __repr__ implementation and can be + called directly when string rendering is needed. + + :param batches: List of Arrow RecordBatch objects + :param schema: Arrow Schema object + :param has_more: Whether there are more batches not shown + :param table_uuid: Unique ID for the table, used for JavaScript interactions + + :returns: String representation of the data + + :raises TypeError: If schema is invalid and no batches are provided + + + + .. py:method:: register_formatter(type_class: type, formatter: CellFormatter) -> None + + Register a custom formatter for a specific data type. + + :param type_class: The type to register a formatter for + :param formatter: Function that takes a value of the given type and returns + a formatted string + + + + .. py:method:: set_custom_cell_builder(builder: collections.abc.Callable[[Any, int, int, str], str]) -> None + + Set a custom cell builder function. + + :param builder: Function that takes (value, row, col, table_id) and returns HTML + + + + .. py:method:: set_custom_header_builder(builder: collections.abc.Callable[[Any], str]) -> None + + Set a custom header builder function. + + :param builder: Function that takes a field and returns HTML + + + + .. py:attribute:: _custom_cell_builder + :type: collections.abc.Callable[[Any, int, int, str], str] | None + :value: None + + + + .. py:attribute:: _custom_header_builder + :type: collections.abc.Callable[[Any], str] | None + :value: None + + + + .. py:attribute:: _max_rows + :value: None + + + + .. py:attribute:: _type_formatters + :type: dict[type, CellFormatter] + + + .. py:attribute:: custom_css + :value: None + + + + .. py:attribute:: enable_cell_expansion + :value: True + + + + .. py:attribute:: max_cell_length + :value: 25 + + + + .. py:attribute:: max_height + :value: 300 + + + + .. py:attribute:: max_memory_bytes + :value: 2097152 + + + + .. py:property:: max_rows + :type: int + + + Get the maximum number of rows to display. + + :returns: The maximum number of rows to display in repr output + + + .. py:attribute:: max_width + :value: 1000 + + + + .. py:attribute:: min_rows + :value: 10 + + + + .. py:property:: repr_rows + :type: int + + + Get the maximum number of rows (deprecated name). + + .. deprecated:: + Use :attr:`max_rows` instead. This property is provided for + backward compatibility. + + :returns: The maximum number of rows to display + + + .. py:attribute:: show_truncation_message + :value: True + + + + .. py:attribute:: style_provider + + + .. py:attribute:: use_shared_styles + :value: True + + + +.. py:class:: DefaultStyleProvider + + Default implementation of StyleProvider. + + + .. py:method:: get_cell_style() -> str + + Get the CSS style for table cells. + + :returns: CSS style string + + + + .. py:method:: get_header_style() -> str + + Get the CSS style for header cells. + + :returns: CSS style string + + + +.. py:class:: FormatterManager + + Manager class for the global DataFrame HTML formatter instance. + + + .. py:method:: get_formatter() -> DataFrameHtmlFormatter + :classmethod: + + + Get the current global DataFrame HTML formatter. + + :returns: The global HTML formatter instance + + + + .. py:method:: set_formatter(formatter: DataFrameHtmlFormatter) -> None + :classmethod: + + + Set the global DataFrame HTML formatter. + + :param formatter: The formatter instance to use globally + + + + .. py:attribute:: _default_formatter + :type: DataFrameHtmlFormatter + + +.. py:class:: StyleProvider + + Bases: :py:obj:`Protocol` + + + Protocol for HTML style providers. + + + .. py:method:: get_cell_style() -> str + + Get the CSS style for table cells. + + + + .. py:method:: get_header_style() -> str + + Get the CSS style for header cells. + + + +.. py:function:: _refresh_formatter_reference() -> None + + Refresh formatter reference in any modules using it. + + This helps ensure that changes to the formatter are reflected in existing + DataFrames that might be caching the formatter reference. + + +.. py:function:: _validate_bool(value: Any, param_name: str) -> None + + Validate that a parameter is a boolean. + + :param value: The value to validate + :param param_name: Name of the parameter (used in error message) + + :raises TypeError: If the value is not a boolean + + +.. py:function:: _validate_formatter_parameters(max_cell_length: int, max_width: int, max_height: int, max_memory_bytes: int, min_rows: int, max_rows: int | None, repr_rows: int | None, enable_cell_expansion: bool, show_truncation_message: bool, use_shared_styles: bool, custom_css: str | None, style_provider: Any) -> int + + Validate all formatter parameters and return resolved max_rows value. + + :param max_cell_length: Maximum cell length value to validate + :param max_width: Maximum width value to validate + :param max_height: Maximum height value to validate + :param max_memory_bytes: Maximum memory bytes value to validate + :param min_rows: Minimum rows to display value to validate + :param max_rows: Maximum rows value to validate (None means use default) + :param repr_rows: Deprecated repr_rows value to validate + :param enable_cell_expansion: Boolean expansion flag to validate + :param show_truncation_message: Boolean message flag to validate + :param use_shared_styles: Boolean styles flag to validate + :param custom_css: Custom CSS string to validate + :param style_provider: Style provider object to validate + + :returns: The resolved max_rows value after handling repr_rows deprecation + + :raises ValueError: If any numeric parameter is invalid or constraints are violated + :raises TypeError: If any parameter has invalid type + :raises DeprecationWarning: If repr_rows parameter is used + + +.. py:function:: _validate_positive_int(value: Any, param_name: str) -> None + + Validate that a parameter is a positive integer. + + :param value: The value to validate + :param param_name: Name of the parameter (used in error message) + + :raises ValueError: If the value is not a positive integer + + +.. py:function:: configure_formatter(**kwargs: Any) -> None + + Configure the global DataFrame HTML formatter. + + This function creates a new formatter with the provided configuration + and sets it as the global formatter for all DataFrames. + + :param \*\*kwargs: Formatter configuration parameters like max_cell_length, + max_width, max_height, enable_cell_expansion, etc. + + :raises ValueError: If any invalid parameters are provided + + .. rubric:: Example + + >>> from datafusion.html_formatter import configure_formatter + >>> configure_formatter( + ... max_cell_length=50, + ... max_height=500, + ... enable_cell_expansion=True, + ... use_shared_styles=True + ... ) + + +.. py:function:: get_formatter() -> DataFrameHtmlFormatter + + Get the current global DataFrame HTML formatter. + + This function is used by the DataFrame._repr_html_ implementation to access + the shared formatter instance. It can also be used directly when custom + HTML rendering is needed. + + :returns: The global HTML formatter instance + + .. rubric:: Example + + >>> from datafusion.html_formatter import get_formatter + >>> formatter = get_formatter() + >>> formatter.max_cell_length = 50 # Increase cell length + + +.. py:function:: reset_formatter() -> None + + Reset the global DataFrame HTML formatter to default settings. + + This function creates a new formatter with default configuration + and sets it as the global formatter for all DataFrames. + + .. rubric:: Example + + >>> from datafusion.html_formatter import reset_formatter + >>> reset_formatter() # Reset formatter to default settings + + +.. py:function:: set_formatter(formatter: DataFrameHtmlFormatter) -> None + + Set the global DataFrame HTML formatter. + + :param formatter: The formatter instance to use globally + + .. rubric:: Example + + >>> from datafusion.html_formatter import get_formatter, set_formatter + >>> custom_formatter = DataFrameHtmlFormatter(max_cell_length=100) + >>> set_formatter(custom_formatter) + + diff --git a/_sources/autoapi/datafusion/expr/index.rst.txt b/_sources/autoapi/datafusion/expr/index.rst.txt new file mode 100644 index 000000000..2bea04f40 --- /dev/null +++ b/_sources/autoapi/datafusion/expr/index.rst.txt @@ -0,0 +1,1356 @@ +datafusion.expr +=============== + +.. py:module:: datafusion.expr + +.. autoapi-nested-parse:: + + This module supports expressions, one of the core concepts in DataFusion. + + See :ref:`Expressions` in the online documentation for more details. + + + +Attributes +---------- + +.. autoapisummary:: + + datafusion.expr.Aggregate + datafusion.expr.AggregateFunction + datafusion.expr.Alias + datafusion.expr.Analyze + datafusion.expr.Between + datafusion.expr.BinaryExpr + datafusion.expr.Case + datafusion.expr.Cast + datafusion.expr.Column + datafusion.expr.CopyTo + datafusion.expr.CreateCatalog + datafusion.expr.CreateCatalogSchema + datafusion.expr.CreateExternalTable + datafusion.expr.CreateFunction + datafusion.expr.CreateFunctionBody + datafusion.expr.CreateIndex + datafusion.expr.CreateMemoryTable + datafusion.expr.CreateView + datafusion.expr.Deallocate + datafusion.expr.DescribeTable + datafusion.expr.Distinct + datafusion.expr.DmlStatement + datafusion.expr.DropCatalogSchema + datafusion.expr.DropFunction + datafusion.expr.DropTable + datafusion.expr.DropView + datafusion.expr.EXPR_TYPE_ERROR + datafusion.expr.EmptyRelation + datafusion.expr.Execute + datafusion.expr.Exists + datafusion.expr.Explain + datafusion.expr.Extension + datafusion.expr.FileType + datafusion.expr.Filter + datafusion.expr.GroupingSet + datafusion.expr.ILike + datafusion.expr.InList + datafusion.expr.InSubquery + datafusion.expr.IsFalse + datafusion.expr.IsNotFalse + datafusion.expr.IsNotNull + datafusion.expr.IsNotTrue + datafusion.expr.IsNotUnknown + datafusion.expr.IsNull + datafusion.expr.IsTrue + datafusion.expr.IsUnknown + datafusion.expr.Join + datafusion.expr.JoinConstraint + datafusion.expr.JoinType + datafusion.expr.Like + datafusion.expr.Limit + datafusion.expr.Literal + datafusion.expr.Negative + datafusion.expr.Not + datafusion.expr.OperateFunctionArg + datafusion.expr.Partitioning + datafusion.expr.Placeholder + datafusion.expr.Prepare + datafusion.expr.Projection + datafusion.expr.RecursiveQuery + datafusion.expr.Repartition + datafusion.expr.ScalarSubquery + datafusion.expr.ScalarVariable + datafusion.expr.SetVariable + datafusion.expr.SimilarTo + datafusion.expr.Sort + datafusion.expr.SortKey + datafusion.expr.Subquery + datafusion.expr.SubqueryAlias + datafusion.expr.TableScan + datafusion.expr.TransactionAccessMode + datafusion.expr.TransactionConclusion + datafusion.expr.TransactionEnd + datafusion.expr.TransactionIsolationLevel + datafusion.expr.TransactionStart + datafusion.expr.TryCast + datafusion.expr.Union + datafusion.expr.Unnest + datafusion.expr.UnnestExpr + datafusion.expr.Values + datafusion.expr.WindowExpr + + +Classes +------- + +.. autoapisummary:: + + datafusion.expr.CaseBuilder + datafusion.expr.Expr + datafusion.expr.SortExpr + datafusion.expr.Window + datafusion.expr.WindowFrame + datafusion.expr.WindowFrameBound + + +Functions +--------- + +.. autoapisummary:: + + datafusion.expr.ensure_expr + datafusion.expr.ensure_expr_list + + +Module Contents +--------------- + +.. py:class:: CaseBuilder(case_builder: datafusion._internal.expr.CaseBuilder) + + Builder class for constructing case statements. + + An example usage would be as follows:: + + import datafusion.functions as f + from datafusion import lit, col + df.select( + f.case(col("column_a")) + .when(lit(1), lit("One")) + .when(lit(2), lit("Two")) + .otherwise(lit("Unknown")) + ) + + Constructs a case builder. + + This is not typically called by the end user directly. See + :py:func:`datafusion.functions.case` instead. + + + .. py:method:: end() -> Expr + + Finish building a case statement. + + Any non-matching cases will end in a `null` value. + + + + .. py:method:: otherwise(else_expr: Expr) -> Expr + + Set a default value for the case statement. + + + + .. py:method:: when(when_expr: Expr, then_expr: Expr) -> CaseBuilder + + Add a case to match against. + + + + .. py:attribute:: case_builder + + +.. py:class:: Expr(expr: datafusion._internal.expr.RawExpr) + + Expression object. + + Expressions are one of the core concepts in DataFusion. See + :ref:`Expressions` in the online documentation for more information. + + This constructor should not be called by the end user. + + + .. py:method:: __add__(rhs: Any) -> Expr + + Addition operator. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __and__(rhs: Expr) -> Expr + + Logical AND. + + + + .. py:method:: __eq__(rhs: object) -> Expr + + Equal to. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __ge__(rhs: Any) -> Expr + + Greater than or equal to. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __getitem__(key: str | int) -> Expr + + Retrieve sub-object. + + If ``key`` is a string, returns the subfield of the struct. + If ``key`` is an integer, retrieves the element in the array. Note that the + element index begins at ``0``, unlike + :py:func:`~datafusion.functions.array_element` which begins at ``1``. + If ``key`` is a slice, returns an array that contains a slice of the + original array. Similar to integer indexing, this follows Python convention + where the index begins at ``0`` unlike + :py:func:`~datafusion.functions.array_slice` which begins at ``1``. + + + + .. py:method:: __gt__(rhs: Any) -> Expr + + Greater than. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __invert__() -> Expr + + Binary not (~). + + + + .. py:method:: __le__(rhs: Any) -> Expr + + Less than or equal to. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __lt__(rhs: Any) -> Expr + + Less than. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __mod__(rhs: Any) -> Expr + + Modulo operator (%). + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __mul__(rhs: Any) -> Expr + + Multiplication operator. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __ne__(rhs: object) -> Expr + + Not equal to. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __or__(rhs: Expr) -> Expr + + Logical OR. + + + + .. py:method:: __repr__() -> str + + Generate a string representation of this expression. + + + + .. py:method:: __richcmp__(other: Expr, op: int) -> Expr + + Comparison operator. + + + + .. py:method:: __sub__(rhs: Any) -> Expr + + Subtraction operator. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __truediv__(rhs: Any) -> Expr + + Division operator. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: abs() -> Expr + + Return the absolute value of a given number. + + Returns: + -------- + Expr + A new expression representing the absolute value of the input expression. + + + + .. py:method:: acos() -> Expr + + Returns the arc cosine or inverse cosine of a number. + + Returns: + -------- + Expr + A new expression representing the arc cosine of the input expression. + + + + .. py:method:: acosh() -> Expr + + Returns inverse hyperbolic cosine. + + + + .. py:method:: alias(name: str, metadata: dict[str, str] | None = None) -> Expr + + Assign a name to the expression. + + :param name: The name to assign to the expression. + :param metadata: Optional metadata to attach to the expression. + + :returns: A new expression with the assigned name. + + + + .. py:method:: array_dims() -> Expr + + Returns an array of the array's dimensions. + + + + .. py:method:: array_distinct() -> Expr + + Returns distinct values from the array after removing duplicates. + + + + .. py:method:: array_empty() -> Expr + + Returns a boolean indicating whether the array is empty. + + + + .. py:method:: array_length() -> Expr + + Returns the length of the array. + + + + .. py:method:: array_ndims() -> Expr + + Returns the number of dimensions of the array. + + + + .. py:method:: array_pop_back() -> Expr + + Returns the array without the last element. + + + + .. py:method:: array_pop_front() -> Expr + + Returns the array without the first element. + + + + .. py:method:: arrow_typeof() -> Expr + + Returns the Arrow type of the expression. + + + + .. py:method:: ascii() -> Expr + + Returns the numeric code of the first character of the argument. + + + + .. py:method:: asin() -> Expr + + Returns the arc sine or inverse sine of a number. + + + + .. py:method:: asinh() -> Expr + + Returns inverse hyperbolic sine. + + + + .. py:method:: atan() -> Expr + + Returns inverse tangent of a number. + + + + .. py:method:: atanh() -> Expr + + Returns inverse hyperbolic tangent. + + + + .. py:method:: between(low: Any, high: Any, negated: bool = False) -> Expr + + Returns ``True`` if this expression is between a given range. + + :param low: lower bound of the range (inclusive). + :param high: higher bound of the range (inclusive). + :param negated: negates whether the expression is between a given range + + + + .. py:method:: bit_length() -> Expr + + Returns the number of bits in the string argument. + + + + .. py:method:: btrim() -> Expr + + Removes all characters, spaces by default, from both sides of a string. + + + + .. py:method:: canonical_name() -> str + + Returns a complete string representation of this expression. + + + + .. py:method:: cardinality() -> Expr + + Returns the total number of elements in the array. + + + + .. py:method:: cast(to: pyarrow.DataType[Any] | type) -> Expr + + Cast to a new data type. + + + + .. py:method:: cbrt() -> Expr + + Returns the cube root of a number. + + + + .. py:method:: ceil() -> Expr + + Returns the nearest integer greater than or equal to argument. + + + + .. py:method:: char_length() -> Expr + + The number of characters in the ``string``. + + + + .. py:method:: character_length() -> Expr + + Returns the number of characters in the argument. + + + + .. py:method:: chr() -> Expr + + Converts the Unicode code point to a UTF8 character. + + + + .. py:method:: column(value: str) -> Expr + :staticmethod: + + + Creates a new expression representing a column. + + + + .. py:method:: column_name(plan: datafusion.plan.LogicalPlan) -> str + + Compute the output column name based on the provided logical plan. + + + + .. py:method:: cos() -> Expr + + Returns the cosine of the argument. + + + + .. py:method:: cosh() -> Expr + + Returns the hyperbolic cosine of the argument. + + + + .. py:method:: cot() -> Expr + + Returns the cotangent of the argument. + + + + .. py:method:: degrees() -> Expr + + Converts the argument from radians to degrees. + + + + .. py:method:: display_name() -> str + + Returns the name of this expression as it should appear in a schema. + + This name will not include any CAST expressions. + + + + .. py:method:: distinct() -> ExprFuncBuilder + + Only evaluate distinct values for an aggregate function. + + This function will create an :py:class:`ExprFuncBuilder` that can be used to + set parameters for either window or aggregate functions. If used on any other + type of expression, an error will be generated when ``build()`` is called. + + + + .. py:method:: empty() -> Expr + + This is an alias for :py:func:`array_empty`. + + + + .. py:method:: exp() -> Expr + + Returns the exponential of the argument. + + + + .. py:method:: factorial() -> Expr + + Returns the factorial of the argument. + + + + .. py:method:: fill_nan(value: Any | Expr | None = None) -> Expr + + Fill NaN values with a provided value. + + + + .. py:method:: fill_null(value: Any | Expr | None = None) -> Expr + + Fill NULL values with a provided value. + + + + .. py:method:: filter(filter: Expr) -> ExprFuncBuilder + + Filter an aggregate function. + + This function will create an :py:class:`ExprFuncBuilder` that can be used to + set parameters for either window or aggregate functions. If used on any other + type of expression, an error will be generated when ``build()`` is called. + + + + .. py:method:: flatten() -> Expr + + Flattens an array of arrays into a single array. + + + + .. py:method:: floor() -> Expr + + Returns the nearest integer less than or equal to the argument. + + + + .. py:method:: from_unixtime() -> Expr + + Converts an integer to RFC3339 timestamp format string. + + + + .. py:method:: initcap() -> Expr + + Set the initial letter of each word to capital. + + Converts the first letter of each word in ``string`` to uppercase and the + remaining characters to lowercase. + + + + .. py:method:: is_not_null() -> Expr + + Returns ``True`` if this expression is not null. + + + + .. py:method:: is_null() -> Expr + + Returns ``True`` if this expression is null. + + + + .. py:method:: isnan() -> Expr + + Returns true if a given number is +NaN or -NaN otherwise returns false. + + + + .. py:method:: iszero() -> Expr + + Returns true if a given number is +0.0 or -0.0 otherwise returns false. + + + + .. py:method:: length() -> Expr + + The number of characters in the ``string``. + + + + .. py:method:: list_dims() -> Expr + + Returns an array of the array's dimensions. + + This is an alias for :py:func:`array_dims`. + + + + .. py:method:: list_distinct() -> Expr + + Returns distinct values from the array after removing duplicates. + + This is an alias for :py:func:`array_distinct`. + + + + .. py:method:: list_length() -> Expr + + Returns the length of the array. + + This is an alias for :py:func:`array_length`. + + + + .. py:method:: list_ndims() -> Expr + + Returns the number of dimensions of the array. + + This is an alias for :py:func:`array_ndims`. + + + + .. py:method:: literal(value: Any) -> Expr + :staticmethod: + + + Creates a new expression representing a scalar value. + + ``value`` must be a valid PyArrow scalar value or easily castable to one. + + + + .. py:method:: literal_with_metadata(value: Any, metadata: dict[str, str]) -> Expr + :staticmethod: + + + Creates a new expression representing a scalar value with metadata. + + :param value: A valid PyArrow scalar value or easily castable to one. + :param metadata: Metadata to attach to the expression. + + + + .. py:method:: ln() -> Expr + + Returns the natural logarithm (base e) of the argument. + + + + .. py:method:: log10() -> Expr + + Base 10 logarithm of the argument. + + + + .. py:method:: log2() -> Expr + + Base 2 logarithm of the argument. + + + + .. py:method:: lower() -> Expr + + Converts a string to lowercase. + + + + .. py:method:: ltrim() -> Expr + + Removes all characters, spaces by default, from the beginning of a string. + + + + .. py:method:: md5() -> Expr + + Computes an MD5 128-bit checksum for a string expression. + + + + .. py:method:: null_treatment(null_treatment: datafusion.common.NullTreatment) -> ExprFuncBuilder + + Set the treatment for ``null`` values for a window or aggregate function. + + This function will create an :py:class:`ExprFuncBuilder` that can be used to + set parameters for either window or aggregate functions. If used on any other + type of expression, an error will be generated when ``build()`` is called. + + + + .. py:method:: octet_length() -> Expr + + Returns the number of bytes of a string. + + + + .. py:method:: order_by(*exprs: Expr | SortExpr) -> ExprFuncBuilder + + Set the ordering for a window or aggregate function. + + This function will create an :py:class:`ExprFuncBuilder` that can be used to + set parameters for either window or aggregate functions. If used on any other + type of expression, an error will be generated when ``build()`` is called. + + + + .. py:method:: over(window: Window) -> Expr + + Turn an aggregate function into a window function. + + This function turns any aggregate function into a window function. With the + exception of ``partition_by``, how each of the parameters is used is determined + by the underlying aggregate function. + + :param window: Window definition + + + + .. py:method:: partition_by(*partition_by: Expr) -> ExprFuncBuilder + + Set the partitioning for a window function. + + This function will create an :py:class:`ExprFuncBuilder` that can be used to + set parameters for either window or aggregate functions. If used on any other + type of expression, an error will be generated when ``build()`` is called. + + + + .. py:method:: python_value() -> Any + + Extracts the Expr value into `Any`. + + This is only valid for literal expressions. + + :returns: Python object representing literal value of the expression. + + + + .. py:method:: radians() -> Expr + + Converts the argument from degrees to radians. + + + + .. py:method:: reverse() -> Expr + + Reverse the string argument. + + + + .. py:method:: rex_call_operands() -> list[Expr] + + Return the operands of the expression based on it's variant type. + + Row expressions, Rex(s), operate on the concept of operands. Different + variants of Expressions, Expr(s), store those operands in different + datastructures. This function examines the Expr variant and returns + the operands to the calling logic. + + + + .. py:method:: rex_call_operator() -> str + + Extracts the operator associated with a row expression type call. + + + + .. py:method:: rex_type() -> datafusion.common.RexType + + Return the Rex Type of this expression. + + A Rex (Row Expression) specifies a single row of data.That specification + could include user defined functions or types. RexType identifies the + row as one of the possible valid ``RexType``. + + + + .. py:method:: rtrim() -> Expr + + Removes all characters, spaces by default, from the end of a string. + + + + .. py:method:: schema_name() -> str + + Returns the name of this expression as it should appear in a schema. + + This name will not include any CAST expressions. + + + + .. py:method:: sha224() -> Expr + + Computes the SHA-224 hash of a binary string. + + + + .. py:method:: sha256() -> Expr + + Computes the SHA-256 hash of a binary string. + + + + .. py:method:: sha384() -> Expr + + Computes the SHA-384 hash of a binary string. + + + + .. py:method:: sha512() -> Expr + + Computes the SHA-512 hash of a binary string. + + + + .. py:method:: signum() -> Expr + + Returns the sign of the argument (-1, 0, +1). + + + + .. py:method:: sin() -> Expr + + Returns the sine of the argument. + + + + .. py:method:: sinh() -> Expr + + Returns the hyperbolic sine of the argument. + + + + .. py:method:: sort(ascending: bool = True, nulls_first: bool = True) -> SortExpr + + Creates a sort :py:class:`Expr` from an existing :py:class:`Expr`. + + :param ascending: If true, sort in ascending order. + :param nulls_first: Return null values first. + + + + .. py:method:: sqrt() -> Expr + + Returns the square root of the argument. + + + + .. py:method:: string_literal(value: str) -> Expr + :staticmethod: + + + Creates a new expression representing a UTF8 literal value. + + It is different from `literal` because it is pa.string() instead of + pa.string_view() + + This is needed for cases where DataFusion is expecting a UTF8 instead of + UTF8View literal, like in: + https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179 + + + + .. py:method:: tan() -> Expr + + Returns the tangent of the argument. + + + + .. py:method:: tanh() -> Expr + + Returns the hyperbolic tangent of the argument. + + + + .. py:method:: to_hex() -> Expr + + Converts an integer to a hexadecimal string. + + + + .. py:method:: to_variant() -> Any + + Convert this expression into a python object if possible. + + + + .. py:method:: trim() -> Expr + + Removes all characters, spaces by default, from both sides of a string. + + + + .. py:method:: types() -> datafusion.common.DataTypeMap + + Return the ``DataTypeMap``. + + :returns: DataTypeMap which represents the PythonType, Arrow DataType, and + SqlType Enum which this expression represents. + + + + .. py:method:: upper() -> Expr + + Converts a string to uppercase. + + + + .. py:method:: variant_name() -> str + + Returns the name of the Expr variant. + + Ex: ``IsNotNull``, ``Literal``, ``BinaryExpr``, etc + + + + .. py:method:: window_frame(window_frame: WindowFrame) -> ExprFuncBuilder + + Set the frame fora window function. + + This function will create an :py:class:`ExprFuncBuilder` that can be used to + set parameters for either window or aggregate functions. If used on any other + type of expression, an error will be generated when ``build()`` is called. + + + + .. py:attribute:: __radd__ + + + .. py:attribute:: __rand__ + + + .. py:attribute:: __rmod__ + + + .. py:attribute:: __rmul__ + + + .. py:attribute:: __ror__ + + + .. py:attribute:: __rsub__ + + + .. py:attribute:: __rtruediv__ + + + .. py:attribute:: _to_pyarrow_types + :type: ClassVar[dict[type, pyarrow.DataType]] + + + .. py:attribute:: expr + + +.. py:class:: SortExpr(expr: Expr, ascending: bool, nulls_first: bool) + + Used to specify sorting on either a DataFrame or function. + + This constructor should not be called by the end user. + + + .. py:method:: __repr__() -> str + + Generate a string representation of this expression. + + + + .. py:method:: ascending() -> bool + + Return ascending property. + + + + .. py:method:: expr() -> Expr + + Return the raw expr backing the SortExpr. + + + + .. py:method:: nulls_first() -> bool + + Return nulls_first property. + + + + .. py:attribute:: raw_sort + + +.. py:class:: Window(partition_by: list[Expr] | Expr | None = None, window_frame: WindowFrame | None = None, order_by: list[SortExpr | Expr | str] | Expr | SortExpr | str | None = None, null_treatment: datafusion.common.NullTreatment | None = None) + + Define reusable window parameters. + + Construct a window definition. + + :param partition_by: Partitions for window operation + :param window_frame: Define the start and end bounds of the window frame + :param order_by: Set ordering + :param null_treatment: Indicate how nulls are to be treated + + + .. py:attribute:: _null_treatment + :value: None + + + + .. py:attribute:: _order_by + :value: None + + + + .. py:attribute:: _partition_by + :value: None + + + + .. py:attribute:: _window_frame + :value: None + + + +.. py:class:: WindowFrame(units: str, start_bound: Any | None, end_bound: Any | None) + + Defines a window frame for performing window operations. + + Construct a window frame using the given parameters. + + :param units: Should be one of ``rows``, ``range``, or ``groups``. + :param start_bound: Sets the preceding bound. Must be >= 0. If none, this + will be set to unbounded. If unit type is ``groups``, this + parameter must be set. + :param end_bound: Sets the following bound. Must be >= 0. If none, this + will be set to unbounded. If unit type is ``groups``, this + parameter must be set. + + + .. py:method:: __repr__() -> str + + Print a string representation of the window frame. + + + + .. py:method:: get_frame_units() -> str + + Returns the window frame units for the bounds. + + + + .. py:method:: get_lower_bound() -> WindowFrameBound + + Returns starting bound. + + + + .. py:method:: get_upper_bound() -> WindowFrameBound + + Returns end bound. + + + + .. py:attribute:: window_frame + + +.. py:class:: WindowFrameBound(frame_bound: datafusion._internal.expr.WindowFrameBound) + + Defines a single window frame bound. + + :py:class:`WindowFrame` typically requires a start and end bound. + + Constructs a window frame bound. + + + .. py:method:: get_offset() -> int | None + + Returns the offset of the window frame. + + + + .. py:method:: is_current_row() -> bool + + Returns if the frame bound is current row. + + + + .. py:method:: is_following() -> bool + + Returns if the frame bound is following. + + + + .. py:method:: is_preceding() -> bool + + Returns if the frame bound is preceding. + + + + .. py:method:: is_unbounded() -> bool + + Returns if the frame bound is unbounded. + + + + .. py:attribute:: frame_bound + + +.. py:function:: ensure_expr(value: Expr | Any) -> datafusion._internal.expr.Expr + + Return the internal expression from ``Expr`` or raise ``TypeError``. + + This helper rejects plain strings and other non-:class:`Expr` values so + higher level APIs consistently require explicit :func:`~datafusion.col` or + :func:`~datafusion.lit` expressions. + + :param value: Candidate expression or other object. + + :returns: The internal expression representation. + + :raises TypeError: If ``value`` is not an instance of :class:`Expr`. + + +.. py:function:: ensure_expr_list(exprs: collections.abc.Iterable[Expr | collections.abc.Iterable[Expr]]) -> list[datafusion._internal.expr.Expr] + + Flatten an iterable of expressions, validating each via ``ensure_expr``. + + :param exprs: Possibly nested iterable containing expressions. + + :returns: A flat list of raw expressions. + + :raises TypeError: If any item is not an instance of :class:`Expr`. + + +.. py:data:: Aggregate + +.. py:data:: AggregateFunction + +.. py:data:: Alias + +.. py:data:: Analyze + +.. py:data:: Between + +.. py:data:: BinaryExpr + +.. py:data:: Case + +.. py:data:: Cast + +.. py:data:: Column + +.. py:data:: CopyTo + +.. py:data:: CreateCatalog + +.. py:data:: CreateCatalogSchema + +.. py:data:: CreateExternalTable + +.. py:data:: CreateFunction + +.. py:data:: CreateFunctionBody + +.. py:data:: CreateIndex + +.. py:data:: CreateMemoryTable + +.. py:data:: CreateView + +.. py:data:: Deallocate + +.. py:data:: DescribeTable + +.. py:data:: Distinct + +.. py:data:: DmlStatement + +.. py:data:: DropCatalogSchema + +.. py:data:: DropFunction + +.. py:data:: DropTable + +.. py:data:: DropView + +.. py:data:: EXPR_TYPE_ERROR + :value: 'Use col()/column() or lit()/literal() to construct expressions' + + +.. py:data:: EmptyRelation + +.. py:data:: Execute + +.. py:data:: Exists + +.. py:data:: Explain + +.. py:data:: Extension + +.. py:data:: FileType + +.. py:data:: Filter + +.. py:data:: GroupingSet + +.. py:data:: ILike + +.. py:data:: InList + +.. py:data:: InSubquery + +.. py:data:: IsFalse + +.. py:data:: IsNotFalse + +.. py:data:: IsNotNull + +.. py:data:: IsNotTrue + +.. py:data:: IsNotUnknown + +.. py:data:: IsNull + +.. py:data:: IsTrue + +.. py:data:: IsUnknown + +.. py:data:: Join + +.. py:data:: JoinConstraint + +.. py:data:: JoinType + +.. py:data:: Like + +.. py:data:: Limit + +.. py:data:: Literal + +.. py:data:: Negative + +.. py:data:: Not + +.. py:data:: OperateFunctionArg + +.. py:data:: Partitioning + +.. py:data:: Placeholder + +.. py:data:: Prepare + +.. py:data:: Projection + +.. py:data:: RecursiveQuery + +.. py:data:: Repartition + +.. py:data:: ScalarSubquery + +.. py:data:: ScalarVariable + +.. py:data:: SetVariable + +.. py:data:: SimilarTo + +.. py:data:: Sort + +.. py:data:: SortKey + +.. py:data:: Subquery + +.. py:data:: SubqueryAlias + +.. py:data:: TableScan + +.. py:data:: TransactionAccessMode + +.. py:data:: TransactionConclusion + +.. py:data:: TransactionEnd + +.. py:data:: TransactionIsolationLevel + +.. py:data:: TransactionStart + +.. py:data:: TryCast + +.. py:data:: Union + +.. py:data:: Unnest + +.. py:data:: UnnestExpr + +.. py:data:: Values + +.. py:data:: WindowExpr + diff --git a/_sources/autoapi/datafusion/functions/index.rst.txt b/_sources/autoapi/datafusion/functions/index.rst.txt new file mode 100644 index 000000000..f1c509093 --- /dev/null +++ b/_sources/autoapi/datafusion/functions/index.rst.txt @@ -0,0 +1,2256 @@ +datafusion.functions +==================== + +.. py:module:: datafusion.functions + +.. autoapi-nested-parse:: + + User functions for operating on :py:class:`~datafusion.expr.Expr`. + + + +Functions +--------- + +.. autoapisummary:: + + datafusion.functions.abs + datafusion.functions.acos + datafusion.functions.acosh + datafusion.functions.alias + datafusion.functions.approx_distinct + datafusion.functions.approx_median + datafusion.functions.approx_percentile_cont + datafusion.functions.approx_percentile_cont_with_weight + datafusion.functions.array + datafusion.functions.array_agg + datafusion.functions.array_append + datafusion.functions.array_cat + datafusion.functions.array_concat + datafusion.functions.array_dims + datafusion.functions.array_distinct + datafusion.functions.array_element + datafusion.functions.array_empty + datafusion.functions.array_except + datafusion.functions.array_extract + datafusion.functions.array_has + datafusion.functions.array_has_all + datafusion.functions.array_has_any + datafusion.functions.array_indexof + datafusion.functions.array_intersect + datafusion.functions.array_join + datafusion.functions.array_length + datafusion.functions.array_ndims + datafusion.functions.array_pop_back + datafusion.functions.array_pop_front + datafusion.functions.array_position + datafusion.functions.array_positions + datafusion.functions.array_prepend + datafusion.functions.array_push_back + datafusion.functions.array_push_front + datafusion.functions.array_remove + datafusion.functions.array_remove_all + datafusion.functions.array_remove_n + datafusion.functions.array_repeat + datafusion.functions.array_replace + datafusion.functions.array_replace_all + datafusion.functions.array_replace_n + datafusion.functions.array_resize + datafusion.functions.array_slice + datafusion.functions.array_sort + datafusion.functions.array_to_string + datafusion.functions.array_union + datafusion.functions.arrow_cast + datafusion.functions.arrow_typeof + datafusion.functions.ascii + datafusion.functions.asin + datafusion.functions.asinh + datafusion.functions.atan + datafusion.functions.atan2 + datafusion.functions.atanh + datafusion.functions.avg + datafusion.functions.bit_and + datafusion.functions.bit_length + datafusion.functions.bit_or + datafusion.functions.bit_xor + datafusion.functions.bool_and + datafusion.functions.bool_or + datafusion.functions.btrim + datafusion.functions.cardinality + datafusion.functions.case + datafusion.functions.cbrt + datafusion.functions.ceil + datafusion.functions.char_length + datafusion.functions.character_length + datafusion.functions.chr + datafusion.functions.coalesce + datafusion.functions.col + datafusion.functions.concat + datafusion.functions.concat_ws + datafusion.functions.corr + datafusion.functions.cos + datafusion.functions.cosh + datafusion.functions.cot + datafusion.functions.count + datafusion.functions.count_star + datafusion.functions.covar + datafusion.functions.covar_pop + datafusion.functions.covar_samp + datafusion.functions.cume_dist + datafusion.functions.current_date + datafusion.functions.current_time + datafusion.functions.date_bin + datafusion.functions.date_part + datafusion.functions.date_trunc + datafusion.functions.datepart + datafusion.functions.datetrunc + datafusion.functions.decode + datafusion.functions.degrees + datafusion.functions.dense_rank + datafusion.functions.digest + datafusion.functions.empty + datafusion.functions.encode + datafusion.functions.ends_with + datafusion.functions.exp + datafusion.functions.extract + datafusion.functions.factorial + datafusion.functions.find_in_set + datafusion.functions.first_value + datafusion.functions.flatten + datafusion.functions.floor + datafusion.functions.from_unixtime + datafusion.functions.gcd + datafusion.functions.in_list + datafusion.functions.initcap + datafusion.functions.isnan + datafusion.functions.iszero + datafusion.functions.lag + datafusion.functions.last_value + datafusion.functions.lcm + datafusion.functions.lead + datafusion.functions.left + datafusion.functions.length + datafusion.functions.levenshtein + datafusion.functions.list_append + datafusion.functions.list_cat + datafusion.functions.list_concat + datafusion.functions.list_dims + datafusion.functions.list_distinct + datafusion.functions.list_element + datafusion.functions.list_except + datafusion.functions.list_extract + datafusion.functions.list_indexof + datafusion.functions.list_intersect + datafusion.functions.list_join + datafusion.functions.list_length + datafusion.functions.list_ndims + datafusion.functions.list_position + datafusion.functions.list_positions + datafusion.functions.list_prepend + datafusion.functions.list_push_back + datafusion.functions.list_push_front + datafusion.functions.list_remove + datafusion.functions.list_remove_all + datafusion.functions.list_remove_n + datafusion.functions.list_repeat + datafusion.functions.list_replace + datafusion.functions.list_replace_all + datafusion.functions.list_replace_n + datafusion.functions.list_resize + datafusion.functions.list_slice + datafusion.functions.list_sort + datafusion.functions.list_to_string + datafusion.functions.list_union + datafusion.functions.ln + datafusion.functions.log + datafusion.functions.log10 + datafusion.functions.log2 + datafusion.functions.lower + datafusion.functions.lpad + datafusion.functions.ltrim + datafusion.functions.make_array + datafusion.functions.make_date + datafusion.functions.make_list + datafusion.functions.max + datafusion.functions.md5 + datafusion.functions.mean + datafusion.functions.median + datafusion.functions.min + datafusion.functions.named_struct + datafusion.functions.nanvl + datafusion.functions.now + datafusion.functions.nth_value + datafusion.functions.ntile + datafusion.functions.nullif + datafusion.functions.nvl + datafusion.functions.octet_length + datafusion.functions.order_by + datafusion.functions.overlay + datafusion.functions.percent_rank + datafusion.functions.pi + datafusion.functions.pow + datafusion.functions.power + datafusion.functions.radians + datafusion.functions.random + datafusion.functions.range + datafusion.functions.rank + datafusion.functions.regexp_count + datafusion.functions.regexp_instr + datafusion.functions.regexp_like + datafusion.functions.regexp_match + datafusion.functions.regexp_replace + datafusion.functions.regr_avgx + datafusion.functions.regr_avgy + datafusion.functions.regr_count + datafusion.functions.regr_intercept + datafusion.functions.regr_r2 + datafusion.functions.regr_slope + datafusion.functions.regr_sxx + datafusion.functions.regr_sxy + datafusion.functions.regr_syy + datafusion.functions.repeat + datafusion.functions.replace + datafusion.functions.reverse + datafusion.functions.right + datafusion.functions.round + datafusion.functions.row_number + datafusion.functions.rpad + datafusion.functions.rtrim + datafusion.functions.sha224 + datafusion.functions.sha256 + datafusion.functions.sha384 + datafusion.functions.sha512 + datafusion.functions.signum + datafusion.functions.sin + datafusion.functions.sinh + datafusion.functions.split_part + datafusion.functions.sqrt + datafusion.functions.starts_with + datafusion.functions.stddev + datafusion.functions.stddev_pop + datafusion.functions.stddev_samp + datafusion.functions.string_agg + datafusion.functions.strpos + datafusion.functions.struct + datafusion.functions.substr + datafusion.functions.substr_index + datafusion.functions.substring + datafusion.functions.sum + datafusion.functions.tan + datafusion.functions.tanh + datafusion.functions.to_hex + datafusion.functions.to_timestamp + datafusion.functions.to_timestamp_micros + datafusion.functions.to_timestamp_millis + datafusion.functions.to_timestamp_nanos + datafusion.functions.to_timestamp_seconds + datafusion.functions.to_unixtime + datafusion.functions.translate + datafusion.functions.trim + datafusion.functions.trunc + datafusion.functions.upper + datafusion.functions.uuid + datafusion.functions.var + datafusion.functions.var_pop + datafusion.functions.var_samp + datafusion.functions.var_sample + datafusion.functions.when + datafusion.functions.window + + +Module Contents +--------------- + +.. py:function:: abs(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Return the absolute value of a given number. + + Returns: + -------- + Expr + A new expression representing the absolute value of the input expression. + + +.. py:function:: acos(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the arc cosine or inverse cosine of a number. + + Returns: + -------- + Expr + A new expression representing the arc cosine of the input expression. + + +.. py:function:: acosh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns inverse hyperbolic cosine. + + +.. py:function:: alias(expr: datafusion.expr.Expr, name: str, metadata: dict[str, str] | None = None) -> datafusion.expr.Expr + + Creates an alias expression with an optional metadata dictionary. + + :param expr: The expression to alias + :param name: The alias name + :param metadata: Optional metadata to attach to the column + + :returns: An expression with the given alias + + +.. py:function:: approx_distinct(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Returns the approximate number of distinct values. + + This aggregate function is similar to :py:func:`count` with distinct set, but it + will approximate the number of distinct entries. It may return significantly faster + than :py:func:`count` for some DataFrames. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param expression: Values to check for distinct entries + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: approx_median(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Returns the approximate median value. + + This aggregate function is similar to :py:func:`median`, but it will only + approximate the median. It may return significantly faster for some DataFrames. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by`` and ``null_treatment``, and ``distinct``. + + :param expression: Values to find the median for + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: approx_percentile_cont(sort_expression: datafusion.expr.Expr | datafusion.expr.SortExpr, percentile: float, num_centroids: int | None = None, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Returns the value that is approximately at a given percentile of ``expr``. + + This aggregate function assumes the input values form a continuous distribution. + Suppose you have a DataFrame which consists of 100 different test scores. If you + called this function with a percentile of 0.9, it would return the value of the + test score that is above 90% of the other test scores. The returned value may be + between two of the values. + + This function uses the [t-digest](https://arxiv.org/abs/1902.04023) algorithm to + compute the percentile. You can limit the number of bins used in this algorithm by + setting the ``num_centroids`` parameter. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param sort_expression: Values for which to find the approximate percentile + :param percentile: This must be between 0.0 and 1.0, inclusive + :param num_centroids: Max bin size for the t-digest algorithm + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: approx_percentile_cont_with_weight(sort_expression: datafusion.expr.Expr | datafusion.expr.SortExpr, weight: datafusion.expr.Expr, percentile: float, num_centroids: int | None = None, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Returns the value of the weighted approximate percentile. + + This aggregate function is similar to :py:func:`approx_percentile_cont` except that + it uses the associated associated weights. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param sort_expression: Values for which to find the approximate percentile + :param weight: Relative weight for each of the values in ``expression`` + :param percentile: This must be between 0.0 and 1.0, inclusive + :param num_centroids: Max bin size for the t-digest algorithm + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: array(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns an array using the specified input expressions. + + This is an alias for :py:func:`make_array`. + + +.. py:function:: array_agg(expression: datafusion.expr.Expr, distinct: bool = False, filter: datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) -> datafusion.expr.Expr + + Aggregate values into an array. + + Currently ``distinct`` and ``order_by`` cannot be used together. As a work around, + consider :py:func:`array_sort` after aggregation. + [Issue Tracker](https://github.com/apache/datafusion/issues/12371) + + If using the builder functions described in ref:`_aggregation` this function ignores + the option ``null_treatment``. + + :param expression: Values to combine into an array + :param distinct: If True, a single entry for each distinct value will be in the result + :param filter: If provided, only compute against rows for which the filter is True + :param order_by: Order the resultant array values. Accepts column names or expressions. + + For example:: + + df.aggregate([], array_agg(col("a"), order_by="b")) + + +.. py:function:: array_append(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr + + Appends an element to the end of an array. + + +.. py:function:: array_cat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Concatenates the input arrays. + + This is an alias for :py:func:`array_concat`. + + +.. py:function:: array_concat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Concatenates the input arrays. + + +.. py:function:: array_dims(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns an array of the array's dimensions. + + +.. py:function:: array_distinct(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns distinct values from the array after removing duplicates. + + +.. py:function:: array_element(array: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr + + Extracts the element with the index n from the array. + + +.. py:function:: array_empty(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns a boolean indicating whether the array is empty. + + +.. py:function:: array_except(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the elements that appear in ``array1`` but not in ``array2``. + + +.. py:function:: array_extract(array: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr + + Extracts the element with the index n from the array. + + This is an alias for :py:func:`array_element`. + + +.. py:function:: array_has(first_array: datafusion.expr.Expr, second_array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns true if the element appears in the first array, otherwise false. + + +.. py:function:: array_has_all(first_array: datafusion.expr.Expr, second_array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Determines if there is complete overlap ``second_array`` in ``first_array``. + + Returns true if each element of the second array appears in the first array. + Otherwise, it returns false. + + +.. py:function:: array_has_any(first_array: datafusion.expr.Expr, second_array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Determine if there is an overlap between ``first_array`` and ``second_array``. + + Returns true if at least one element of the second array appears in the first + array. Otherwise, it returns false. + + +.. py:function:: array_indexof(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) -> datafusion.expr.Expr + + Return the position of the first occurrence of ``element`` in ``array``. + + This is an alias for :py:func:`array_position`. + + +.. py:function:: array_intersect(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the intersection of ``array1`` and ``array2``. + + +.. py:function:: array_join(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts each element to its text representation. + + This is an alias for :py:func:`array_to_string`. + + +.. py:function:: array_length(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the length of the array. + + +.. py:function:: array_ndims(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the number of dimensions of the array. + + +.. py:function:: array_pop_back(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the array without the last element. + + +.. py:function:: array_pop_front(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the array without the first element. + + +.. py:function:: array_position(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) -> datafusion.expr.Expr + + Return the position of the first occurrence of ``element`` in ``array``. + + +.. py:function:: array_positions(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr + + Searches for an element in the array and returns all occurrences. + + +.. py:function:: array_prepend(element: datafusion.expr.Expr, array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Prepends an element to the beginning of an array. + + +.. py:function:: array_push_back(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr + + Appends an element to the end of an array. + + This is an alias for :py:func:`array_append`. + + +.. py:function:: array_push_front(element: datafusion.expr.Expr, array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Prepends an element to the beginning of an array. + + This is an alias for :py:func:`array_prepend`. + + +.. py:function:: array_remove(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr + + Removes the first element from the array equal to the given value. + + +.. py:function:: array_remove_all(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr + + Removes all elements from the array equal to the given value. + + +.. py:function:: array_remove_n(array: datafusion.expr.Expr, element: datafusion.expr.Expr, max: datafusion.expr.Expr) -> datafusion.expr.Expr + + Removes the first ``max`` elements from the array equal to the given value. + + +.. py:function:: array_repeat(element: datafusion.expr.Expr, count: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns an array containing ``element`` ``count`` times. + + +.. py:function:: array_replace(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr + + Replaces the first occurrence of ``from_val`` with ``to_val``. + + +.. py:function:: array_replace_all(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr + + Replaces all occurrences of ``from_val`` with ``to_val``. + + +.. py:function:: array_replace_n(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr, max: datafusion.expr.Expr) -> datafusion.expr.Expr + + Replace ``n`` occurrences of ``from_val`` with ``to_val``. + + Replaces the first ``max`` occurrences of the specified element with another + specified element. + + +.. py:function:: array_resize(array: datafusion.expr.Expr, size: datafusion.expr.Expr, value: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns an array with the specified size filled. + + If ``size`` is greater than the ``array`` length, the additional entries will + be filled with the given ``value``. + + +.. py:function:: array_slice(array: datafusion.expr.Expr, begin: datafusion.expr.Expr, end: datafusion.expr.Expr, stride: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Returns a slice of the array. + + +.. py:function:: array_sort(array: datafusion.expr.Expr, descending: bool = False, null_first: bool = False) -> datafusion.expr.Expr + + Sort an array. + + :param array: The input array to sort. + :param descending: If True, sorts in descending order. + :param null_first: If True, nulls will be returned at the beginning of the array. + + +.. py:function:: array_to_string(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts each element to its text representation. + + +.. py:function:: array_union(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns an array of the elements in the union of array1 and array2. + + Duplicate rows will not be returned. + + +.. py:function:: arrow_cast(expr: datafusion.expr.Expr, data_type: datafusion.expr.Expr) -> datafusion.expr.Expr + + Casts an expression to a specified data type. + + +.. py:function:: arrow_typeof(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the Arrow type of the expression. + + +.. py:function:: ascii(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the numeric code of the first character of the argument. + + +.. py:function:: asin(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the arc sine or inverse sine of a number. + + +.. py:function:: asinh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns inverse hyperbolic sine. + + +.. py:function:: atan(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns inverse tangent of a number. + + +.. py:function:: atan2(y: datafusion.expr.Expr, x: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns inverse tangent of a division given in the argument. + + +.. py:function:: atanh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns inverse hyperbolic tangent. + + +.. py:function:: avg(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Returns the average value. + + This aggregate function expects a numeric expression and will return a float. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param expression: Values to combine into an array + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: bit_and(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the bitwise AND of the argument. + + This aggregate function will bitwise compare every value in the input partition. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param expression: Argument to perform bitwise calculation on + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: bit_length(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the number of bits in the string argument. + + +.. py:function:: bit_or(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the bitwise OR of the argument. + + This aggregate function will bitwise compare every value in the input partition. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param expression: Argument to perform bitwise calculation on + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: bit_xor(expression: datafusion.expr.Expr, distinct: bool = False, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the bitwise XOR of the argument. + + This aggregate function will bitwise compare every value in the input partition. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by`` and ``null_treatment``. + + :param expression: Argument to perform bitwise calculation on + :param distinct: If True, evaluate each unique value of expression only once + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: bool_and(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the boolean AND of the argument. + + This aggregate function will compare every value in the input partition. These are + expected to be boolean values. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param expression: Argument to perform calculation on + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: bool_or(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the boolean OR of the argument. + + This aggregate function will compare every value in the input partition. These are + expected to be boolean values. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param expression: Argument to perform calculation on + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: btrim(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Removes all characters, spaces by default, from both sides of a string. + + +.. py:function:: cardinality(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the total number of elements in the array. + + +.. py:function:: case(expr: datafusion.expr.Expr) -> datafusion.expr.CaseBuilder + + Create a case expression. + + Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the + expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for + detailed usage. + + +.. py:function:: cbrt(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the cube root of a number. + + +.. py:function:: ceil(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the nearest integer greater than or equal to argument. + + +.. py:function:: char_length(string: datafusion.expr.Expr) -> datafusion.expr.Expr + + The number of characters in the ``string``. + + +.. py:function:: character_length(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the number of characters in the argument. + + +.. py:function:: chr(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts the Unicode code point to a UTF8 character. + + +.. py:function:: coalesce(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the value of the first expr in ``args`` which is not NULL. + + +.. py:function:: col(name: str) -> datafusion.expr.Expr + + Creates a column reference expression. + + +.. py:function:: concat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Concatenates the text representations of all the arguments. + + NULL arguments are ignored. + + +.. py:function:: concat_ws(separator: str, *args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Concatenates the list ``args`` with the separator. + + ``NULL`` arguments are ignored. ``separator`` should not be ``NULL``. + + +.. py:function:: corr(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Returns the correlation coefficient between ``value1`` and ``value2``. + + This aggregate function expects both values to be numeric and will return a float. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param value_y: The dependent variable for correlation + :param value_x: The independent variable for correlation + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: cos(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the cosine of the argument. + + +.. py:function:: cosh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the hyperbolic cosine of the argument. + + +.. py:function:: cot(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the cotangent of the argument. + + +.. py:function:: count(expressions: datafusion.expr.Expr | list[datafusion.expr.Expr] | None = None, distinct: bool = False, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Returns the number of rows that match the given arguments. + + This aggregate function will count the non-null rows provided in the expression. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by`` and ``null_treatment``. + + :param expressions: Argument to perform bitwise calculation on + :param distinct: If True, a single entry for each distinct value will be in the result + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: count_star(filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Create a COUNT(1) aggregate expression. + + This aggregate function will count all of the rows in the partition. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``distinct``, and ``null_treatment``. + + :param filter: If provided, only count rows for which the filter is True + + +.. py:function:: covar(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the sample covariance. + + This is an alias for :py:func:`covar_samp`. + + +.. py:function:: covar_pop(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the population covariance. + + This aggregate function expects both values to be numeric and will return a float. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param value_y: The dependent variable for covariance + :param value_x: The independent variable for covariance + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: covar_samp(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the sample covariance. + + This aggregate function expects both values to be numeric and will return a float. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param value_y: The dependent variable for covariance + :param value_x: The independent variable for covariance + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: cume_dist(partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) -> datafusion.expr.Expr + + Create a cumulative distribution window function. + + This window function is similar to :py:func:`rank` except that the returned values + are the ratio of the row number to the total number of rows. Here is an example of a + dataframe with a window ordered by descending ``points`` and the associated + cumulative distribution:: + + +--------+-----------+ + | points | cume_dist | + +--------+-----------+ + | 100 | 0.5 | + | 100 | 0.5 | + | 50 | 0.75 | + | 25 | 1.0 | + +--------+-----------+ + + :param partition_by: Expressions to partition the window frame on. + :param order_by: Set ordering within the window frame. Accepts + column names or expressions. + + For example:: + + cume_dist(order_by="points") + + +.. py:function:: current_date() -> datafusion.expr.Expr + + Returns current UTC date as a Date32 value. + + +.. py:function:: current_time() -> datafusion.expr.Expr + + Returns current UTC time as a Time64 value. + + +.. py:function:: date_bin(stride: datafusion.expr.Expr, source: datafusion.expr.Expr, origin: datafusion.expr.Expr) -> datafusion.expr.Expr + + Coerces an arbitrary timestamp to the start of the nearest specified interval. + + +.. py:function:: date_part(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr + + Extracts a subfield from the date. + + +.. py:function:: date_trunc(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr + + Truncates the date to a specified level of precision. + + +.. py:function:: datepart(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr + + Return a specified part of a date. + + This is an alias for :py:func:`date_part`. + + +.. py:function:: datetrunc(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr + + Truncates the date to a specified level of precision. + + This is an alias for :py:func:`date_trunc`. + + +.. py:function:: decode(expr: datafusion.expr.Expr, encoding: datafusion.expr.Expr) -> datafusion.expr.Expr + + Decode the ``input``, using the ``encoding``. encoding can be base64 or hex. + + +.. py:function:: degrees(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts the argument from radians to degrees. + + +.. py:function:: dense_rank(partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) -> datafusion.expr.Expr + + Create a dense_rank window function. + + This window function is similar to :py:func:`rank` except that the returned values + will be consecutive. Here is an example of a dataframe with a window ordered by + descending ``points`` and the associated dense rank:: + + +--------+------------+ + | points | dense_rank | + +--------+------------+ + | 100 | 1 | + | 100 | 1 | + | 50 | 2 | + | 25 | 3 | + +--------+------------+ + + :param partition_by: Expressions to partition the window frame on. + :param order_by: Set ordering within the window frame. Accepts + column names or expressions. + + For example:: + + dense_rank(order_by="points") + + +.. py:function:: digest(value: datafusion.expr.Expr, method: datafusion.expr.Expr) -> datafusion.expr.Expr + + Computes the binary hash of an expression using the specified algorithm. + + Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, + blake2b, and blake3. + + +.. py:function:: empty(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + This is an alias for :py:func:`array_empty`. + + +.. py:function:: encode(expr: datafusion.expr.Expr, encoding: datafusion.expr.Expr) -> datafusion.expr.Expr + + Encode the ``input``, using the ``encoding``. encoding can be base64 or hex. + + +.. py:function:: ends_with(arg: datafusion.expr.Expr, suffix: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns true if the ``string`` ends with the ``suffix``, false otherwise. + + +.. py:function:: exp(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the exponential of the argument. + + +.. py:function:: extract(part: datafusion.expr.Expr, date: datafusion.expr.Expr) -> datafusion.expr.Expr + + Extracts a subfield from the date. + + This is an alias for :py:func:`date_part`. + + +.. py:function:: factorial(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the factorial of the argument. + + +.. py:function:: find_in_set(string: datafusion.expr.Expr, string_list: datafusion.expr.Expr) -> datafusion.expr.Expr + + Find a string in a list of strings. + + Returns a value in the range of 1 to N if the string is in the string list + ``string_list`` consisting of N substrings. + + The string list is a string composed of substrings separated by ``,`` characters. + + +.. py:function:: first_value(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None, null_treatment: datafusion.common.NullTreatment = NullTreatment.RESPECT_NULLS) -> datafusion.expr.Expr + + Returns the first value in a group of values. + + This aggregate function will return the first value in the partition. + + If using the builder functions described in ref:`_aggregation` this function ignores + the option ``distinct``. + + :param expression: Argument to perform bitwise calculation on + :param filter: If provided, only compute against rows for which the filter is True + :param order_by: Set the ordering of the expression to evaluate. Accepts + column names or expressions. + :param null_treatment: Assign whether to respect or ignore null values. + + For example:: + + df.aggregate([], first_value(col("a"), order_by="ts")) + + +.. py:function:: flatten(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Flattens an array of arrays into a single array. + + +.. py:function:: floor(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the nearest integer less than or equal to the argument. + + +.. py:function:: from_unixtime(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts an integer to RFC3339 timestamp format string. + + +.. py:function:: gcd(x: datafusion.expr.Expr, y: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the greatest common divisor. + + +.. py:function:: in_list(arg: datafusion.expr.Expr, values: list[datafusion.expr.Expr], negated: bool = False) -> datafusion.expr.Expr + + Returns whether the argument is contained within the list ``values``. + + +.. py:function:: initcap(string: datafusion.expr.Expr) -> datafusion.expr.Expr + + Set the initial letter of each word to capital. + + Converts the first letter of each word in ``string`` to uppercase and the remaining + characters to lowercase. + + +.. py:function:: isnan(expr: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns true if a given number is +NaN or -NaN otherwise returns false. + + +.. py:function:: iszero(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns true if a given number is +0.0 or -0.0 otherwise returns false. + + +.. py:function:: lag(arg: datafusion.expr.Expr, shift_offset: int = 1, default_value: Any | None = None, partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) -> datafusion.expr.Expr + + Create a lag window function. + + Lag operation will return the argument that is in the previous shift_offset-th row + in the partition. For example ``lag(col("b"), shift_offset=3, default_value=5)`` + will return the 3rd previous value in column ``b``. At the beginning of the + partition, where no values can be returned it will return the default value of 5. + + Here is an example of both the ``lag`` and :py:func:`datafusion.functions.lead` + functions on a simple DataFrame:: + + +--------+------+-----+ + | points | lead | lag | + +--------+------+-----+ + | 100 | 100 | | + | 100 | 50 | 100 | + | 50 | 25 | 100 | + | 25 | | 50 | + +--------+------+-----+ + + :param arg: Value to return + :param shift_offset: Number of rows before the current row. + :param default_value: Value to return if shift_offet row does not exist. + :param partition_by: Expressions to partition the window frame on. + :param order_by: Set ordering within the window frame. Accepts + column names or expressions. + + For example:: + + lag(col("b"), order_by="ts") + + +.. py:function:: last_value(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None, null_treatment: datafusion.common.NullTreatment = NullTreatment.RESPECT_NULLS) -> datafusion.expr.Expr + + Returns the last value in a group of values. + + This aggregate function will return the last value in the partition. + + If using the builder functions described in ref:`_aggregation` this function ignores + the option ``distinct``. + + :param expression: Argument to perform bitwise calculation on + :param filter: If provided, only compute against rows for which the filter is True + :param order_by: Set the ordering of the expression to evaluate. Accepts + column names or expressions. + :param null_treatment: Assign whether to respect or ignore null values. + + For example:: + + df.aggregate([], last_value(col("a"), order_by="ts")) + + +.. py:function:: lcm(x: datafusion.expr.Expr, y: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the least common multiple. + + +.. py:function:: lead(arg: datafusion.expr.Expr, shift_offset: int = 1, default_value: Any | None = None, partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) -> datafusion.expr.Expr + + Create a lead window function. + + Lead operation will return the argument that is in the next shift_offset-th row in + the partition. For example ``lead(col("b"), shift_offset=3, default_value=5)`` will + return the 3rd following value in column ``b``. At the end of the partition, where + no further values can be returned it will return the default value of 5. + + Here is an example of both the ``lead`` and :py:func:`datafusion.functions.lag` + functions on a simple DataFrame:: + + +--------+------+-----+ + | points | lead | lag | + +--------+------+-----+ + | 100 | 100 | | + | 100 | 50 | 100 | + | 50 | 25 | 100 | + | 25 | | 50 | + +--------+------+-----+ + + To set window function parameters use the window builder approach described in the + ref:`_window_functions` online documentation. + + :param arg: Value to return + :param shift_offset: Number of rows following the current row. + :param default_value: Value to return if shift_offet row does not exist. + :param partition_by: Expressions to partition the window frame on. + :param order_by: Set ordering within the window frame. Accepts + column names or expressions. + + For example:: + + lead(col("b"), order_by="ts") + + +.. py:function:: left(string: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the first ``n`` characters in the ``string``. + + +.. py:function:: length(string: datafusion.expr.Expr) -> datafusion.expr.Expr + + The number of characters in the ``string``. + + +.. py:function:: levenshtein(string1: datafusion.expr.Expr, string2: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the Levenshtein distance between the two given strings. + + +.. py:function:: list_append(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr + + Appends an element to the end of an array. + + This is an alias for :py:func:`array_append`. + + +.. py:function:: list_cat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Concatenates the input arrays. + + This is an alias for :py:func:`array_concat`, :py:func:`array_cat`. + + +.. py:function:: list_concat(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Concatenates the input arrays. + + This is an alias for :py:func:`array_concat`, :py:func:`array_cat`. + + +.. py:function:: list_dims(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns an array of the array's dimensions. + + This is an alias for :py:func:`array_dims`. + + +.. py:function:: list_distinct(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns distinct values from the array after removing duplicates. + + This is an alias for :py:func:`array_distinct`. + + +.. py:function:: list_element(array: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr + + Extracts the element with the index n from the array. + + This is an alias for :py:func:`array_element`. + + +.. py:function:: list_except(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the elements that appear in ``array1`` but not in the ``array2``. + + This is an alias for :py:func:`array_except`. + + +.. py:function:: list_extract(array: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr + + Extracts the element with the index n from the array. + + This is an alias for :py:func:`array_element`. + + +.. py:function:: list_indexof(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) -> datafusion.expr.Expr + + Return the position of the first occurrence of ``element`` in ``array``. + + This is an alias for :py:func:`array_position`. + + +.. py:function:: list_intersect(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns an the intersection of ``array1`` and ``array2``. + + This is an alias for :py:func:`array_intersect`. + + +.. py:function:: list_join(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts each element to its text representation. + + This is an alias for :py:func:`array_to_string`. + + +.. py:function:: list_length(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the length of the array. + + This is an alias for :py:func:`array_length`. + + +.. py:function:: list_ndims(array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the number of dimensions of the array. + + This is an alias for :py:func:`array_ndims`. + + +.. py:function:: list_position(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) -> datafusion.expr.Expr + + Return the position of the first occurrence of ``element`` in ``array``. + + This is an alias for :py:func:`array_position`. + + +.. py:function:: list_positions(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr + + Searches for an element in the array and returns all occurrences. + + This is an alias for :py:func:`array_positions`. + + +.. py:function:: list_prepend(element: datafusion.expr.Expr, array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Prepends an element to the beginning of an array. + + This is an alias for :py:func:`array_prepend`. + + +.. py:function:: list_push_back(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr + + Appends an element to the end of an array. + + This is an alias for :py:func:`array_append`. + + +.. py:function:: list_push_front(element: datafusion.expr.Expr, array: datafusion.expr.Expr) -> datafusion.expr.Expr + + Prepends an element to the beginning of an array. + + This is an alias for :py:func:`array_prepend`. + + +.. py:function:: list_remove(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr + + Removes the first element from the array equal to the given value. + + This is an alias for :py:func:`array_remove`. + + +.. py:function:: list_remove_all(array: datafusion.expr.Expr, element: datafusion.expr.Expr) -> datafusion.expr.Expr + + Removes all elements from the array equal to the given value. + + This is an alias for :py:func:`array_remove_all`. + + +.. py:function:: list_remove_n(array: datafusion.expr.Expr, element: datafusion.expr.Expr, max: datafusion.expr.Expr) -> datafusion.expr.Expr + + Removes the first ``max`` elements from the array equal to the given value. + + This is an alias for :py:func:`array_remove_n`. + + +.. py:function:: list_repeat(element: datafusion.expr.Expr, count: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns an array containing ``element`` ``count`` times. + + This is an alias for :py:func:`array_repeat`. + + +.. py:function:: list_replace(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr + + Replaces the first occurrence of ``from_val`` with ``to_val``. + + This is an alias for :py:func:`array_replace`. + + +.. py:function:: list_replace_all(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr + + Replaces all occurrences of ``from_val`` with ``to_val``. + + This is an alias for :py:func:`array_replace_all`. + + +.. py:function:: list_replace_n(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr, max: datafusion.expr.Expr) -> datafusion.expr.Expr + + Replace ``n`` occurrences of ``from_val`` with ``to_val``. + + Replaces the first ``max`` occurrences of the specified element with another + specified element. + + This is an alias for :py:func:`array_replace_n`. + + +.. py:function:: list_resize(array: datafusion.expr.Expr, size: datafusion.expr.Expr, value: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns an array with the specified size filled. + + If ``size`` is greater than the ``array`` length, the additional entries will be + filled with the given ``value``. This is an alias for :py:func:`array_resize`. + + +.. py:function:: list_slice(array: datafusion.expr.Expr, begin: datafusion.expr.Expr, end: datafusion.expr.Expr, stride: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Returns a slice of the array. + + This is an alias for :py:func:`array_slice`. + + +.. py:function:: list_sort(array: datafusion.expr.Expr, descending: bool = False, null_first: bool = False) -> datafusion.expr.Expr + + This is an alias for :py:func:`array_sort`. + + +.. py:function:: list_to_string(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts each element to its text representation. + + This is an alias for :py:func:`array_to_string`. + + +.. py:function:: list_union(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns an array of the elements in the union of array1 and array2. + + Duplicate rows will not be returned. + + This is an alias for :py:func:`array_union`. + + +.. py:function:: ln(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the natural logarithm (base e) of the argument. + + +.. py:function:: log(base: datafusion.expr.Expr, num: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the logarithm of a number for a particular ``base``. + + +.. py:function:: log10(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Base 10 logarithm of the argument. + + +.. py:function:: log2(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Base 2 logarithm of the argument. + + +.. py:function:: lower(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts a string to lowercase. + + +.. py:function:: lpad(string: datafusion.expr.Expr, count: datafusion.expr.Expr, characters: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Add left padding to a string. + + Extends the string to length length by prepending the characters fill (a + space by default). If the string is already longer than length then it is + truncated (on the right). + + +.. py:function:: ltrim(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Removes all characters, spaces by default, from the beginning of a string. + + +.. py:function:: make_array(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns an array using the specified input expressions. + + +.. py:function:: make_date(year: datafusion.expr.Expr, month: datafusion.expr.Expr, day: datafusion.expr.Expr) -> datafusion.expr.Expr + + Make a date from year, month and day component parts. + + +.. py:function:: make_list(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns an array using the specified input expressions. + + This is an alias for :py:func:`make_array`. + + +.. py:function:: max(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Aggregate function that returns the maximum value of the argument. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param expression: The value to find the maximum of + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: md5(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Computes an MD5 128-bit checksum for a string expression. + + +.. py:function:: mean(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Returns the average (mean) value of the argument. + + This is an alias for :py:func:`avg`. + + +.. py:function:: median(expression: datafusion.expr.Expr, distinct: bool = False, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the median of a set of numbers. + + This aggregate function returns the median value of the expression for the given + aggregate function. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by`` and ``null_treatment``. + + :param expression: The value to compute the median of + :param distinct: If True, a single entry for each distinct value will be in the result + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: min(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Aggregate function that returns the minimum value of the argument. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param expression: The value to find the minimum of + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: named_struct(name_pairs: list[tuple[str, datafusion.expr.Expr]]) -> datafusion.expr.Expr + + Returns a struct with the given names and arguments pairs. + + +.. py:function:: nanvl(x: datafusion.expr.Expr, y: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``. + + +.. py:function:: now() -> datafusion.expr.Expr + + Returns the current timestamp in nanoseconds. + + This will use the same value for all instances of now() in same statement. + + +.. py:function:: nth_value(expression: datafusion.expr.Expr, n: int, filter: datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None, null_treatment: datafusion.common.NullTreatment = NullTreatment.RESPECT_NULLS) -> datafusion.expr.Expr + + Returns the n-th value in a group of values. + + This aggregate function will return the n-th value in the partition. + + If using the builder functions described in ref:`_aggregation` this function ignores + the option ``distinct``. + + :param expression: Argument to perform bitwise calculation on + :param n: Index of value to return. Starts at 1. + :param filter: If provided, only compute against rows for which the filter is True + :param order_by: Set the ordering of the expression to evaluate. Accepts + column names or expressions. + :param null_treatment: Assign whether to respect or ignore null values. + + For example:: + + df.aggregate([], nth_value(col("a"), 2, order_by="ts")) + + +.. py:function:: ntile(groups: int, partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) -> datafusion.expr.Expr + + Create a n-tile window function. + + This window function orders the window frame into a give number of groups based on + the ordering criteria. It then returns which group the current row is assigned to. + Here is an example of a dataframe with a window ordered by descending ``points`` + and the associated n-tile function:: + + +--------+-------+ + | points | ntile | + +--------+-------+ + | 120 | 1 | + | 100 | 1 | + | 80 | 2 | + | 60 | 2 | + | 40 | 3 | + | 20 | 3 | + +--------+-------+ + + :param groups: Number of groups for the n-tile to be divided into. + :param partition_by: Expressions to partition the window frame on. + :param order_by: Set ordering within the window frame. Accepts + column names or expressions. + + For example:: + + ntile(3, order_by="points") + + +.. py:function:: nullif(expr1: datafusion.expr.Expr, expr2: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns NULL if expr1 equals expr2; otherwise it returns expr1. + + This can be used to perform the inverse operation of the COALESCE expression. + + +.. py:function:: nvl(x: datafusion.expr.Expr, y: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns ``x`` if ``x`` is not ``NULL``. Otherwise returns ``y``. + + +.. py:function:: octet_length(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the number of bytes of a string. + + +.. py:function:: order_by(expr: datafusion.expr.Expr, ascending: bool = True, nulls_first: bool = True) -> datafusion.expr.SortExpr + + Creates a new sort expression. + + +.. py:function:: overlay(string: datafusion.expr.Expr, substring: datafusion.expr.Expr, start: datafusion.expr.Expr, length: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Replace a substring with a new substring. + + Replace the substring of string that starts at the ``start``'th character and + extends for ``length`` characters with new substring. + + +.. py:function:: percent_rank(partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) -> datafusion.expr.Expr + + Create a percent_rank window function. + + This window function is similar to :py:func:`rank` except that the returned values + are the percentage from 0.0 to 1.0 from first to last. Here is an example of a + dataframe with a window ordered by descending ``points`` and the associated percent + rank:: + + +--------+--------------+ + | points | percent_rank | + +--------+--------------+ + | 100 | 0.0 | + | 100 | 0.0 | + | 50 | 0.666667 | + | 25 | 1.0 | + +--------+--------------+ + + :param partition_by: Expressions to partition the window frame on. + :param order_by: Set ordering within the window frame. Accepts + column names or expressions. + + For example:: + + percent_rank(order_by="points") + + +.. py:function:: pi() -> datafusion.expr.Expr + + Returns an approximate value of π. + + +.. py:function:: pow(base: datafusion.expr.Expr, exponent: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns ``base`` raised to the power of ``exponent``. + + This is an alias of :py:func:`power`. + + +.. py:function:: power(base: datafusion.expr.Expr, exponent: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns ``base`` raised to the power of ``exponent``. + + +.. py:function:: radians(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts the argument from degrees to radians. + + +.. py:function:: random() -> datafusion.expr.Expr + + Returns a random value in the range ``0.0 <= x < 1.0``. + + +.. py:function:: range(start: datafusion.expr.Expr, stop: datafusion.expr.Expr, step: datafusion.expr.Expr) -> datafusion.expr.Expr + + Create a list of values in the range between start and stop. + + +.. py:function:: rank(partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) -> datafusion.expr.Expr + + Create a rank window function. + + Returns the rank based upon the window order. Consecutive equal values will receive + the same rank, but the next different value will not be consecutive but rather the + number of rows that precede it plus one. This is similar to Olympic medals. If two + people tie for gold, the next place is bronze. There would be no silver medal. Here + is an example of a dataframe with a window ordered by descending ``points`` and the + associated rank. + + You should set ``order_by`` to produce meaningful results:: + + +--------+------+ + | points | rank | + +--------+------+ + | 100 | 1 | + | 100 | 1 | + | 50 | 3 | + | 25 | 4 | + +--------+------+ + + :param partition_by: Expressions to partition the window frame on. + :param order_by: Set ordering within the window frame. Accepts + column names or expressions. + + For example:: + + rank(order_by="points") + + +.. py:function:: regexp_count(string: datafusion.expr.Expr, pattern: datafusion.expr.Expr, start: datafusion.expr.Expr | None = None, flags: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Returns the number of matches in a string. + + Optional start position (the first position is 1) to search for the regular + expression. + + +.. py:function:: regexp_instr(values: datafusion.expr.Expr, regex: datafusion.expr.Expr, start: datafusion.expr.Expr | None = None, n: datafusion.expr.Expr | None = None, flags: datafusion.expr.Expr | None = None, sub_expr: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Returns the position of a regular expression match in a string. + + Searches ``values`` for the ``n``-th occurrence of ``regex``, starting at position + ``start`` (the first position is 1). Returns the starting or ending position based + on ``end_position``. Use ``flags`` to control regex behavior and ``sub_expr`` to + return the position of a specific capture group instead of the entire match. + + +.. py:function:: regexp_like(string: datafusion.expr.Expr, regex: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Find if any regular expression (regex) matches exist. + + Tests a string using a regular expression returning true if at least one match, + false otherwise. + + +.. py:function:: regexp_match(string: datafusion.expr.Expr, regex: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Perform regular expression (regex) matching. + + Returns an array with each element containing the leftmost-first match of the + corresponding index in ``regex`` to string in ``string``. + + +.. py:function:: regexp_replace(string: datafusion.expr.Expr, pattern: datafusion.expr.Expr, replacement: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Replaces substring(s) matching a PCRE-like regular expression. + + The full list of supported features and syntax can be found at + + + Supported flags with the addition of 'g' can be found at + + + +.. py:function:: regr_avgx(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the average of the independent variable ``x``. + + This is a linear regression aggregate function. Only non-null pairs of the inputs + are evaluated. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param y: The linear regression dependent variable + :param x: The linear regression independent variable + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: regr_avgy(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the average of the dependent variable ``y``. + + This is a linear regression aggregate function. Only non-null pairs of the inputs + are evaluated. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param y: The linear regression dependent variable + :param x: The linear regression independent variable + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: regr_count(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Counts the number of rows in which both expressions are not null. + + This is a linear regression aggregate function. Only non-null pairs of the inputs + are evaluated. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param y: The linear regression dependent variable + :param x: The linear regression independent variable + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: regr_intercept(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the intercept from the linear regression. + + This is a linear regression aggregate function. Only non-null pairs of the inputs + are evaluated. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param y: The linear regression dependent variable + :param x: The linear regression independent variable + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: regr_r2(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the R-squared value from linear regression. + + This is a linear regression aggregate function. Only non-null pairs of the inputs + are evaluated. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param y: The linear regression dependent variable + :param x: The linear regression independent variable + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: regr_slope(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the slope from linear regression. + + This is a linear regression aggregate function. Only non-null pairs of the inputs + are evaluated. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param y: The linear regression dependent variable + :param x: The linear regression independent variable + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: regr_sxx(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the sum of squares of the independent variable ``x``. + + This is a linear regression aggregate function. Only non-null pairs of the inputs + are evaluated. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param y: The linear regression dependent variable + :param x: The linear regression independent variable + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: regr_sxy(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the sum of products of pairs of numbers. + + This is a linear regression aggregate function. Only non-null pairs of the inputs + are evaluated. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param y: The linear regression dependent variable + :param x: The linear regression independent variable + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: regr_syy(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the sum of squares of the dependent variable ``y``. + + This is a linear regression aggregate function. Only non-null pairs of the inputs + are evaluated. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param y: The linear regression dependent variable + :param x: The linear regression independent variable + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: repeat(string: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr + + Repeats the ``string`` to ``n`` times. + + +.. py:function:: replace(string: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr + + Replaces all occurrences of ``from_val`` with ``to_val`` in the ``string``. + + +.. py:function:: reverse(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Reverse the string argument. + + +.. py:function:: right(string: datafusion.expr.Expr, n: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the last ``n`` characters in the ``string``. + + +.. py:function:: round(value: datafusion.expr.Expr, decimal_places: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Round the argument to the nearest integer. + + If the optional ``decimal_places`` is specified, round to the nearest number of + decimal places. You can specify a negative number of decimal places. For example + ``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``. + + +.. py:function:: row_number(partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) -> datafusion.expr.Expr + + Create a row number window function. + + Returns the row number of the window function. + + Here is an example of the ``row_number`` on a simple DataFrame:: + + +--------+------------+ + | points | row number | + +--------+------------+ + | 100 | 1 | + | 100 | 2 | + | 50 | 3 | + | 25 | 4 | + +--------+------------+ + + :param partition_by: Expressions to partition the window frame on. + :param order_by: Set ordering within the window frame. Accepts + column names or expressions. + + For example:: + + row_number(order_by="points") + + +.. py:function:: rpad(string: datafusion.expr.Expr, count: datafusion.expr.Expr, characters: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Add right padding to a string. + + Extends the string to length length by appending the characters fill (a space + by default). If the string is already longer than length then it is truncated. + + +.. py:function:: rtrim(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Removes all characters, spaces by default, from the end of a string. + + +.. py:function:: sha224(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Computes the SHA-224 hash of a binary string. + + +.. py:function:: sha256(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Computes the SHA-256 hash of a binary string. + + +.. py:function:: sha384(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Computes the SHA-384 hash of a binary string. + + +.. py:function:: sha512(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Computes the SHA-512 hash of a binary string. + + +.. py:function:: signum(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the sign of the argument (-1, 0, +1). + + +.. py:function:: sin(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the sine of the argument. + + +.. py:function:: sinh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the hyperbolic sine of the argument. + + +.. py:function:: split_part(string: datafusion.expr.Expr, delimiter: datafusion.expr.Expr, index: datafusion.expr.Expr) -> datafusion.expr.Expr + + Split a string and return one part. + + Splits a string based on a delimiter and picks out the desired field based + on the index. + + +.. py:function:: sqrt(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the square root of the argument. + + +.. py:function:: starts_with(string: datafusion.expr.Expr, prefix: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns true if string starts with prefix. + + +.. py:function:: stddev(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the standard deviation of the argument. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param expression: The value to find the minimum of + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: stddev_pop(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the population standard deviation of the argument. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param expression: The value to find the minimum of + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: stddev_samp(arg: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the sample standard deviation of the argument. + + This is an alias for :py:func:`stddev`. + + +.. py:function:: string_agg(expression: datafusion.expr.Expr, delimiter: str, filter: datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) -> datafusion.expr.Expr + + Concatenates the input strings. + + This aggregate function will concatenate input strings, ignoring null values, and + separating them with the specified delimiter. Non-string values will be converted to + their string equivalents. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``distinct`` and ``null_treatment``. + + :param expression: Argument to perform bitwise calculation on + :param delimiter: Text to place between each value of expression + :param filter: If provided, only compute against rows for which the filter is True + :param order_by: Set the ordering of the expression to evaluate. Accepts + column names or expressions. + + For example:: + + df.aggregate([], string_agg(col("a"), ",", order_by="b")) + + +.. py:function:: strpos(string: datafusion.expr.Expr, substring: datafusion.expr.Expr) -> datafusion.expr.Expr + + Finds the position from where the ``substring`` matches the ``string``. + + +.. py:function:: struct(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns a struct with the given arguments. + + +.. py:function:: substr(string: datafusion.expr.Expr, position: datafusion.expr.Expr) -> datafusion.expr.Expr + + Substring from the ``position`` to the end. + + +.. py:function:: substr_index(string: datafusion.expr.Expr, delimiter: datafusion.expr.Expr, count: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns an indexed substring. + + The return will be the ``string`` from before ``count`` occurrences of + ``delimiter``. + + +.. py:function:: substring(string: datafusion.expr.Expr, position: datafusion.expr.Expr, length: datafusion.expr.Expr) -> datafusion.expr.Expr + + Substring from the ``position`` with ``length`` characters. + + +.. py:function:: sum(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the sum of a set of numbers. + + This aggregate function expects a numeric expression. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param expression: Values to combine into an array + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: tan(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the tangent of the argument. + + +.. py:function:: tanh(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Returns the hyperbolic tangent of the argument. + + +.. py:function:: to_hex(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts an integer to a hexadecimal string. + + +.. py:function:: to_timestamp(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts a string and optional formats to a ``Timestamp`` in nanoseconds. + + For usage of ``formatters`` see the rust chrono package ``strftime`` package. + + [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) + + +.. py:function:: to_timestamp_micros(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts a string and optional formats to a ``Timestamp`` in microseconds. + + See :py:func:`to_timestamp` for a description on how to use formatters. + + +.. py:function:: to_timestamp_millis(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts a string and optional formats to a ``Timestamp`` in milliseconds. + + See :py:func:`to_timestamp` for a description on how to use formatters. + + +.. py:function:: to_timestamp_nanos(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts a string and optional formats to a ``Timestamp`` in nanoseconds. + + See :py:func:`to_timestamp` for a description on how to use formatters. + + +.. py:function:: to_timestamp_seconds(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts a string and optional formats to a ``Timestamp`` in seconds. + + See :py:func:`to_timestamp` for a description on how to use formatters. + + +.. py:function:: to_unixtime(string: datafusion.expr.Expr, *format_arguments: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts a string and optional formats to a Unixtime. + + +.. py:function:: translate(string: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) -> datafusion.expr.Expr + + Replaces the characters in ``from_val`` with the counterpart in ``to_val``. + + +.. py:function:: trim(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Removes all characters, spaces by default, from both sides of a string. + + +.. py:function:: trunc(num: datafusion.expr.Expr, precision: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Truncate the number toward zero with optional precision. + + +.. py:function:: upper(arg: datafusion.expr.Expr) -> datafusion.expr.Expr + + Converts a string to uppercase. + + +.. py:function:: uuid() -> datafusion.expr.Expr + + Returns uuid v4 as a string value. + + +.. py:function:: var(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the sample variance of the argument. + + This is an alias for :py:func:`var_samp`. + + +.. py:function:: var_pop(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the population variance of the argument. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param expression: The variable to compute the variance for + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: var_samp(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the sample variance of the argument. + + If using the builder functions described in ref:`_aggregation` this function ignores + the options ``order_by``, ``null_treatment``, and ``distinct``. + + :param expression: The variable to compute the variance for + :param filter: If provided, only compute against rows for which the filter is True + + +.. py:function:: var_sample(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) -> datafusion.expr.Expr + + Computes the sample variance of the argument. + + This is an alias for :py:func:`var_samp`. + + +.. py:function:: when(when: datafusion.expr.Expr, then: datafusion.expr.Expr) -> datafusion.expr.CaseBuilder + + Create a case expression that has no base expression. + + Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the + expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for + detailed usage. + + +.. py:function:: window(name: str, args: list[datafusion.expr.Expr], partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None, window_frame: datafusion.expr.WindowFrame | None = None, filter: datafusion.expr.Expr | None = None, distinct: bool = False, ctx: datafusion.context.SessionContext | None = None) -> datafusion.expr.Expr + + Creates a new Window function expression. + + This interface will soon be deprecated. Instead of using this interface, + users should call the window functions directly. For example, to perform a + lag use:: + + df.select(functions.lag(col("a")).partition_by(col("b")).build()) + + The ``order_by`` parameter accepts column names or expressions, e.g.:: + + window("lag", [col("a")], order_by="ts") + + diff --git a/_sources/autoapi/datafusion/html_formatter/index.rst.txt b/_sources/autoapi/datafusion/html_formatter/index.rst.txt new file mode 100644 index 000000000..2b9efd822 --- /dev/null +++ b/_sources/autoapi/datafusion/html_formatter/index.rst.txt @@ -0,0 +1,11 @@ +datafusion.html_formatter +========================= + +.. py:module:: datafusion.html_formatter + +.. autoapi-nested-parse:: + + Deprecated module for dataframe formatting. + + + diff --git a/_sources/autoapi/datafusion/index.rst.txt b/_sources/autoapi/datafusion/index.rst.txt new file mode 100644 index 000000000..24fecff4f --- /dev/null +++ b/_sources/autoapi/datafusion/index.rst.txt @@ -0,0 +1,2750 @@ +datafusion +========== + +.. py:module:: datafusion + +.. autoapi-nested-parse:: + + DataFusion python package. + + This is a Python library that binds to Apache Arrow in-memory query engine DataFusion. + See https://datafusion.apache.org/python for more information. + + + +Submodules +---------- + +.. toctree:: + :maxdepth: 1 + + /autoapi/datafusion/catalog/index + /autoapi/datafusion/context/index + /autoapi/datafusion/dataframe/index + /autoapi/datafusion/dataframe_formatter/index + /autoapi/datafusion/expr/index + /autoapi/datafusion/functions/index + /autoapi/datafusion/html_formatter/index + /autoapi/datafusion/input/index + /autoapi/datafusion/io/index + /autoapi/datafusion/object_store/index + /autoapi/datafusion/options/index + /autoapi/datafusion/plan/index + /autoapi/datafusion/record_batch/index + /autoapi/datafusion/substrait/index + /autoapi/datafusion/unparser/index + /autoapi/datafusion/user_defined/index + + +Attributes +---------- + +.. autoapisummary:: + + datafusion.DFSchema + datafusion.col + datafusion.column + datafusion.udaf + datafusion.udf + datafusion.udtf + datafusion.udwf + + +Classes +------- + +.. autoapisummary:: + + datafusion.Accumulator + datafusion.AggregateUDF + datafusion.Catalog + datafusion.CsvReadOptions + datafusion.DataFrameWriteOptions + datafusion.Database + datafusion.ExecutionPlan + datafusion.Expr + datafusion.InsertOp + datafusion.LogicalPlan + datafusion.ParquetColumnOptions + datafusion.ParquetWriterOptions + datafusion.RecordBatch + datafusion.RecordBatchStream + datafusion.RuntimeEnvBuilder + datafusion.SQLOptions + datafusion.ScalarUDF + datafusion.SessionConfig + datafusion.Table + datafusion.TableFunction + datafusion.WindowFrame + datafusion.WindowUDF + + +Functions +--------- + +.. autoapisummary:: + + datafusion.configure_formatter + datafusion.lit + datafusion.literal + datafusion.read_avro + datafusion.read_csv + datafusion.read_json + datafusion.read_parquet + + +Package Contents +---------------- + +.. py:class:: Accumulator + + Defines how an :py:class:`AggregateUDF` accumulates values. + + + .. py:method:: evaluate() -> pyarrow.Scalar + :abstractmethod: + + + Return the resultant value. + + While this function template expects a PyArrow Scalar value return type, + you can return any value that can be converted into a Scalar. This + includes basic Python data types such as integers and strings. In + addition to primitive types, we currently support PyArrow, nanoarrow, + and arro3 objects in addition to primitive data types. Other objects + that support the Arrow FFI standard will be given a "best attempt" at + conversion to scalar objects. + + + + .. py:method:: merge(states: list[pyarrow.Array]) -> None + :abstractmethod: + + + Merge a set of states. + + + + .. py:method:: state() -> list[pyarrow.Scalar] + :abstractmethod: + + + Return the current state. + + While this function template expects PyArrow Scalar values return type, + you can return any value that can be converted into a Scalar. This + includes basic Python data types such as integers and strings. In + addition to primitive types, we currently support PyArrow, nanoarrow, + and arro3 objects in addition to primitive data types. Other objects + that support the Arrow FFI standard will be given a "best attempt" at + conversion to scalar objects. + + + + .. py:method:: update(*values: pyarrow.Array) -> None + :abstractmethod: + + + Evaluate an array of values and update state. + + + +.. py:class:: AggregateUDF(name: str, accumulator: collections.abc.Callable[[], Accumulator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, state_type: list[pyarrow.DataType], volatility: Volatility | str) + AggregateUDF(name: str, accumulator: AggregateUDFExportable, input_types: None = ..., return_type: None = ..., state_type: None = ..., volatility: None = ...) + + Class for performing scalar user-defined functions (UDF). + + Aggregate UDFs operate on a group of rows and return a single value. See + also :py:class:`ScalarUDF` for operating on a row by row basis. + + Instantiate a user-defined aggregate function (UDAF). + + See :py:func:`udaf` for a convenience function and argument + descriptions. + + + .. py:method:: __call__(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Execute the UDAF. + + This function is not typically called by an end user. These calls will + occur during the evaluation of the dataframe. + + + + .. py:method:: __repr__() -> str + + Print a string representation of the Aggregate UDF. + + + + .. py:method:: from_pycapsule(func: AggregateUDFExportable | _typeshed.CapsuleType) -> AggregateUDF + :staticmethod: + + + Create an Aggregate UDF from AggregateUDF PyCapsule object. + + This function will instantiate a Aggregate UDF that uses a DataFusion + AggregateUDF that is exported via the FFI bindings. + + + + .. py:method:: udaf(input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, state_type: list[pyarrow.DataType], volatility: Volatility | str, name: str | None = None) -> collections.abc.Callable[Ellipsis, AggregateUDF] + udaf(accum: collections.abc.Callable[[], Accumulator], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, state_type: list[pyarrow.DataType], volatility: Volatility | str, name: str | None = None) -> AggregateUDF + udaf(accum: AggregateUDFExportable) -> AggregateUDF + udaf(accum: _typeshed.CapsuleType) -> AggregateUDF + :staticmethod: + + + Create a new User-Defined Aggregate Function (UDAF). + + This class allows you to define an aggregate function that can be used in + data aggregation or window function calls. + + Usage: + - As a function: ``udaf(accum, input_types, return_type, state_type, volatility, name)``. + - As a decorator: ``@udaf(input_types, return_type, state_type, volatility, name)``. + When using ``udaf`` as a decorator, do not pass ``accum`` explicitly. + + Function example: + + If your :py:class:`Accumulator` can be instantiated with no arguments, you + can simply pass it's type as `accum`. If you need to pass additional + arguments to it's constructor, you can define a lambda or a factory method. + During runtime the :py:class:`Accumulator` will be constructed for every + instance in which this UDAF is used. The following examples are all valid:: + + import pyarrow as pa + import pyarrow.compute as pc + + class Summarize(Accumulator): + def __init__(self, bias: float = 0.0): + self._sum = pa.scalar(bias) + + def state(self) -> list[pa.Scalar]: + return [self._sum] + + def update(self, values: pa.Array) -> None: + self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py()) + + def merge(self, states: list[pa.Array]) -> None: + self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py()) + + def evaluate(self) -> pa.Scalar: + return self._sum + + def sum_bias_10() -> Summarize: + return Summarize(10.0) + + udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()], + "immutable") + udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()], + "immutable") + udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(), + [pa.float64()], "immutable") + + Decorator example::: + + @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable") + def udf4() -> Summarize: + return Summarize(10.0) + + :param accum: The accumulator python function. Only needed when calling as a + function. Skip this argument when using ``udaf`` as a decorator. + If you have a Rust backed AggregateUDF within a PyCapsule, you can + pass this parameter and ignore the rest. They will be determined + directly from the underlying function. See the online documentation + for more information. + :param input_types: The data types of the arguments to ``accum``. + :param return_type: The data type of the return value. + :param state_type: The data types of the intermediate accumulation. + :param volatility: See :py:class:`Volatility` for allowed values. + :param name: A descriptive name for the function. + + :returns: A user-defined aggregate function, which can be used in either data + aggregation or window function calls. + + + + .. py:attribute:: _udaf + + +.. py:class:: Catalog(catalog: datafusion._internal.catalog.RawCatalog) + + DataFusion data catalog. + + This constructor is not typically called by the end user. + + + .. py:method:: __repr__() -> str + + Print a string representation of the catalog. + + + + .. py:method:: database(name: str = 'public') -> Schema + + Returns the database with the given ``name`` from this catalog. + + + + .. py:method:: deregister_schema(name: str, cascade: bool = True) -> Schema | None + + Deregister a schema from this catalog. + + + + .. py:method:: memory_catalog(ctx: datafusion.SessionContext | None = None) -> Catalog + :staticmethod: + + + Create an in-memory catalog provider. + + + + .. py:method:: names() -> set[str] + + This is an alias for `schema_names`. + + + + .. py:method:: register_schema(name: str, schema: Schema | SchemaProvider | SchemaProviderExportable) -> Schema | None + + Register a schema with this catalog. + + + + .. py:method:: schema(name: str = 'public') -> Schema + + Returns the database with the given ``name`` from this catalog. + + + + .. py:method:: schema_names() -> set[str] + + Returns the list of schemas in this catalog. + + + + .. py:attribute:: catalog + + +.. py:class:: CsvReadOptions(*, has_header: bool = True, delimiter: str = ',', quote: str = '"', terminator: str | None = None, escape: str | None = None, comment: str | None = None, newlines_in_values: bool = False, schema: pyarrow.Schema | None = None, schema_infer_max_records: int = DEFAULT_MAX_INFER_SCHEMA, file_extension: str = '.csv', table_partition_cols: list[tuple[str, pyarrow.DataType]] | None = None, file_compression_type: str = '', file_sort_order: list[list[datafusion.expr.SortExpr]] | None = None, null_regex: str | None = None, truncated_rows: bool = False) + + Options for reading CSV files. + + This class provides a builder pattern for configuring CSV reading options. + All methods starting with ``with_`` return ``self`` to allow method chaining. + + Initialize CsvReadOptions. + + :param has_header: Does the CSV file have a header row? If schema inference + is run on a file with no headers, default column names are created. + :param delimiter: Column delimiter character. Must be a single ASCII character. + :param quote: Quote character for fields containing delimiters or newlines. + Must be a single ASCII character. + :param terminator: Optional line terminator character. If ``None``, uses CRLF. + Must be a single ASCII character. + :param escape: Optional escape character for quotes. Must be a single ASCII + character. + :param comment: If specified, lines beginning with this character are ignored. + Must be a single ASCII character. + :param newlines_in_values: Whether newlines in quoted values are supported. + Parsing newlines in quoted values may be affected by execution + behavior such as parallel file scanning. Setting this to ``True`` + ensures that newlines in values are parsed successfully, which may + reduce performance. + :param schema: Optional PyArrow schema representing the CSV files. If ``None``, + the CSV reader will try to infer it based on data in the file. + :param schema_infer_max_records: Maximum number of rows to read from CSV files + for schema inference if needed. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param table_partition_cols: Partition columns as a list of tuples of + (column_name, data_type). + :param file_compression_type: File compression type. Supported values are + ``"gzip"``, ``"bz2"``, ``"xz"``, ``"zstd"``, or empty string for + uncompressed. + :param file_sort_order: Optional sort order of the files as a list of sort + expressions per file. + :param null_regex: Optional regex pattern to match null values in the CSV. + :param truncated_rows: Whether to allow truncated rows when parsing. By default + this is ``False`` and will error if the CSV rows have different + lengths. When set to ``True``, it will allow records with less than + the expected number of columns and fill the missing columns with + nulls. If the record's schema is not nullable, it will still return + an error. + + + .. py:method:: to_inner() -> datafusion._internal.options.CsvReadOptions + + Convert this object into the underlying Rust structure. + + This is intended for internal use only. + + + + .. py:method:: with_comment(comment: str | None) -> CsvReadOptions + + Configure the comment character. + + + + .. py:method:: with_delimiter(delimiter: str) -> CsvReadOptions + + Configure the column delimiter. + + + + .. py:method:: with_escape(escape: str | None) -> CsvReadOptions + + Configure the escape character. + + + + .. py:method:: with_file_compression_type(file_compression_type: str) -> CsvReadOptions + + Configure file compression type. + + + + .. py:method:: with_file_extension(file_extension: str) -> CsvReadOptions + + Configure the file extension filter. + + + + .. py:method:: with_file_sort_order(file_sort_order: list[list[datafusion.expr.SortExpr]]) -> CsvReadOptions + + Configure file sort order. + + + + .. py:method:: with_has_header(has_header: bool) -> CsvReadOptions + + Configure whether the CSV has a header row. + + + + .. py:method:: with_newlines_in_values(newlines_in_values: bool) -> CsvReadOptions + + Configure whether newlines in values are supported. + + + + .. py:method:: with_null_regex(null_regex: str | None) -> CsvReadOptions + + Configure null value regex pattern. + + + + .. py:method:: with_quote(quote: str) -> CsvReadOptions + + Configure the quote character. + + + + .. py:method:: with_schema(schema: pyarrow.Schema | None) -> CsvReadOptions + + Configure the schema. + + + + .. py:method:: with_schema_infer_max_records(schema_infer_max_records: int) -> CsvReadOptions + + Configure maximum records for schema inference. + + + + .. py:method:: with_table_partition_cols(table_partition_cols: list[tuple[str, pyarrow.DataType]]) -> CsvReadOptions + + Configure table partition columns. + + + + .. py:method:: with_terminator(terminator: str | None) -> CsvReadOptions + + Configure the line terminator character. + + + + .. py:method:: with_truncated_rows(truncated_rows: bool) -> CsvReadOptions + + Configure whether to allow truncated rows. + + + + .. py:attribute:: comment + :value: None + + + + .. py:attribute:: delimiter + :value: ',' + + + + .. py:attribute:: escape + :value: None + + + + .. py:attribute:: file_compression_type + :value: '' + + + + .. py:attribute:: file_extension + :value: '.csv' + + + + .. py:attribute:: file_sort_order + :value: [] + + + + .. py:attribute:: has_header + :value: True + + + + .. py:attribute:: newlines_in_values + :value: False + + + + .. py:attribute:: null_regex + :value: None + + + + .. py:attribute:: quote + :value: '"' + + + + .. py:attribute:: schema + :value: None + + + + .. py:attribute:: schema_infer_max_records + :value: 1000 + + + + .. py:attribute:: table_partition_cols + :value: [] + + + + .. py:attribute:: terminator + :value: None + + + + .. py:attribute:: truncated_rows + :value: False + + + +.. py:class:: DataFrameWriteOptions(insert_operation: InsertOp | None = None, single_file_output: bool = False, partition_by: str | collections.abc.Sequence[str] | None = None, sort_by: datafusion.expr.Expr | datafusion.expr.SortExpr | collections.abc.Sequence[datafusion.expr.Expr] | collections.abc.Sequence[datafusion.expr.SortExpr] | None = None) + + Writer options for DataFrame. + + There is no guarantee the table provider supports all writer options. + See the individual implementation and documentation for details. + + Instantiate writer options for DataFrame. + + + .. py:attribute:: _raw_write_options + + +.. py:class:: Database(schema: datafusion._internal.catalog.RawSchema) + + Bases: :py:obj:`Schema` + + + See `Schema`. + + This constructor is not typically called by the end user. + + +.. py:class:: ExecutionPlan(plan: datafusion._internal.ExecutionPlan) + + Represent nodes in the DataFusion Physical Plan. + + This constructor should not be called by the end user. + + + .. py:method:: __repr__() -> str + + Print a string representation of the physical plan. + + + + .. py:method:: children() -> list[ExecutionPlan] + + Get a list of children `ExecutionPlan` that act as inputs to this plan. + + The returned list will be empty for leaf nodes such as scans, will contain a + single value for unary nodes, or two values for binary nodes (such as joins). + + + + .. py:method:: display() -> str + + Print the physical plan. + + + + .. py:method:: display_indent() -> str + + Print an indented form of the physical plan. + + + + .. py:method:: from_proto(ctx: datafusion.context.SessionContext, data: bytes) -> ExecutionPlan + :staticmethod: + + + Create an ExecutionPlan from protobuf bytes. + + Tables created in memory from record batches are currently not supported. + + + + .. py:method:: to_proto() -> bytes + + Convert an ExecutionPlan into protobuf bytes. + + Tables created in memory from record batches are currently not supported. + + + + .. py:attribute:: _raw_plan + + + .. py:property:: partition_count + :type: int + + + Returns the number of partitions in the physical plan. + + +.. py:class:: Expr(expr: datafusion._internal.expr.RawExpr) + + Expression object. + + Expressions are one of the core concepts in DataFusion. See + :ref:`Expressions` in the online documentation for more information. + + This constructor should not be called by the end user. + + + .. py:method:: __add__(rhs: Any) -> Expr + + Addition operator. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __and__(rhs: Expr) -> Expr + + Logical AND. + + + + .. py:method:: __eq__(rhs: object) -> Expr + + Equal to. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __ge__(rhs: Any) -> Expr + + Greater than or equal to. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __getitem__(key: str | int) -> Expr + + Retrieve sub-object. + + If ``key`` is a string, returns the subfield of the struct. + If ``key`` is an integer, retrieves the element in the array. Note that the + element index begins at ``0``, unlike + :py:func:`~datafusion.functions.array_element` which begins at ``1``. + If ``key`` is a slice, returns an array that contains a slice of the + original array. Similar to integer indexing, this follows Python convention + where the index begins at ``0`` unlike + :py:func:`~datafusion.functions.array_slice` which begins at ``1``. + + + + .. py:method:: __gt__(rhs: Any) -> Expr + + Greater than. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __invert__() -> Expr + + Binary not (~). + + + + .. py:method:: __le__(rhs: Any) -> Expr + + Less than or equal to. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __lt__(rhs: Any) -> Expr + + Less than. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __mod__(rhs: Any) -> Expr + + Modulo operator (%). + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __mul__(rhs: Any) -> Expr + + Multiplication operator. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __ne__(rhs: object) -> Expr + + Not equal to. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __or__(rhs: Expr) -> Expr + + Logical OR. + + + + .. py:method:: __repr__() -> str + + Generate a string representation of this expression. + + + + .. py:method:: __richcmp__(other: Expr, op: int) -> Expr + + Comparison operator. + + + + .. py:method:: __sub__(rhs: Any) -> Expr + + Subtraction operator. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: __truediv__(rhs: Any) -> Expr + + Division operator. + + Accepts either an expression or any valid PyArrow scalar literal value. + + + + .. py:method:: abs() -> Expr + + Return the absolute value of a given number. + + Returns: + -------- + Expr + A new expression representing the absolute value of the input expression. + + + + .. py:method:: acos() -> Expr + + Returns the arc cosine or inverse cosine of a number. + + Returns: + -------- + Expr + A new expression representing the arc cosine of the input expression. + + + + .. py:method:: acosh() -> Expr + + Returns inverse hyperbolic cosine. + + + + .. py:method:: alias(name: str, metadata: dict[str, str] | None = None) -> Expr + + Assign a name to the expression. + + :param name: The name to assign to the expression. + :param metadata: Optional metadata to attach to the expression. + + :returns: A new expression with the assigned name. + + + + .. py:method:: array_dims() -> Expr + + Returns an array of the array's dimensions. + + + + .. py:method:: array_distinct() -> Expr + + Returns distinct values from the array after removing duplicates. + + + + .. py:method:: array_empty() -> Expr + + Returns a boolean indicating whether the array is empty. + + + + .. py:method:: array_length() -> Expr + + Returns the length of the array. + + + + .. py:method:: array_ndims() -> Expr + + Returns the number of dimensions of the array. + + + + .. py:method:: array_pop_back() -> Expr + + Returns the array without the last element. + + + + .. py:method:: array_pop_front() -> Expr + + Returns the array without the first element. + + + + .. py:method:: arrow_typeof() -> Expr + + Returns the Arrow type of the expression. + + + + .. py:method:: ascii() -> Expr + + Returns the numeric code of the first character of the argument. + + + + .. py:method:: asin() -> Expr + + Returns the arc sine or inverse sine of a number. + + + + .. py:method:: asinh() -> Expr + + Returns inverse hyperbolic sine. + + + + .. py:method:: atan() -> Expr + + Returns inverse tangent of a number. + + + + .. py:method:: atanh() -> Expr + + Returns inverse hyperbolic tangent. + + + + .. py:method:: between(low: Any, high: Any, negated: bool = False) -> Expr + + Returns ``True`` if this expression is between a given range. + + :param low: lower bound of the range (inclusive). + :param high: higher bound of the range (inclusive). + :param negated: negates whether the expression is between a given range + + + + .. py:method:: bit_length() -> Expr + + Returns the number of bits in the string argument. + + + + .. py:method:: btrim() -> Expr + + Removes all characters, spaces by default, from both sides of a string. + + + + .. py:method:: canonical_name() -> str + + Returns a complete string representation of this expression. + + + + .. py:method:: cardinality() -> Expr + + Returns the total number of elements in the array. + + + + .. py:method:: cast(to: pyarrow.DataType[Any] | type) -> Expr + + Cast to a new data type. + + + + .. py:method:: cbrt() -> Expr + + Returns the cube root of a number. + + + + .. py:method:: ceil() -> Expr + + Returns the nearest integer greater than or equal to argument. + + + + .. py:method:: char_length() -> Expr + + The number of characters in the ``string``. + + + + .. py:method:: character_length() -> Expr + + Returns the number of characters in the argument. + + + + .. py:method:: chr() -> Expr + + Converts the Unicode code point to a UTF8 character. + + + + .. py:method:: column(value: str) -> Expr + :staticmethod: + + + Creates a new expression representing a column. + + + + .. py:method:: column_name(plan: datafusion.plan.LogicalPlan) -> str + + Compute the output column name based on the provided logical plan. + + + + .. py:method:: cos() -> Expr + + Returns the cosine of the argument. + + + + .. py:method:: cosh() -> Expr + + Returns the hyperbolic cosine of the argument. + + + + .. py:method:: cot() -> Expr + + Returns the cotangent of the argument. + + + + .. py:method:: degrees() -> Expr + + Converts the argument from radians to degrees. + + + + .. py:method:: display_name() -> str + + Returns the name of this expression as it should appear in a schema. + + This name will not include any CAST expressions. + + + + .. py:method:: distinct() -> ExprFuncBuilder + + Only evaluate distinct values for an aggregate function. + + This function will create an :py:class:`ExprFuncBuilder` that can be used to + set parameters for either window or aggregate functions. If used on any other + type of expression, an error will be generated when ``build()`` is called. + + + + .. py:method:: empty() -> Expr + + This is an alias for :py:func:`array_empty`. + + + + .. py:method:: exp() -> Expr + + Returns the exponential of the argument. + + + + .. py:method:: factorial() -> Expr + + Returns the factorial of the argument. + + + + .. py:method:: fill_nan(value: Any | Expr | None = None) -> Expr + + Fill NaN values with a provided value. + + + + .. py:method:: fill_null(value: Any | Expr | None = None) -> Expr + + Fill NULL values with a provided value. + + + + .. py:method:: filter(filter: Expr) -> ExprFuncBuilder + + Filter an aggregate function. + + This function will create an :py:class:`ExprFuncBuilder` that can be used to + set parameters for either window or aggregate functions. If used on any other + type of expression, an error will be generated when ``build()`` is called. + + + + .. py:method:: flatten() -> Expr + + Flattens an array of arrays into a single array. + + + + .. py:method:: floor() -> Expr + + Returns the nearest integer less than or equal to the argument. + + + + .. py:method:: from_unixtime() -> Expr + + Converts an integer to RFC3339 timestamp format string. + + + + .. py:method:: initcap() -> Expr + + Set the initial letter of each word to capital. + + Converts the first letter of each word in ``string`` to uppercase and the + remaining characters to lowercase. + + + + .. py:method:: is_not_null() -> Expr + + Returns ``True`` if this expression is not null. + + + + .. py:method:: is_null() -> Expr + + Returns ``True`` if this expression is null. + + + + .. py:method:: isnan() -> Expr + + Returns true if a given number is +NaN or -NaN otherwise returns false. + + + + .. py:method:: iszero() -> Expr + + Returns true if a given number is +0.0 or -0.0 otherwise returns false. + + + + .. py:method:: length() -> Expr + + The number of characters in the ``string``. + + + + .. py:method:: list_dims() -> Expr + + Returns an array of the array's dimensions. + + This is an alias for :py:func:`array_dims`. + + + + .. py:method:: list_distinct() -> Expr + + Returns distinct values from the array after removing duplicates. + + This is an alias for :py:func:`array_distinct`. + + + + .. py:method:: list_length() -> Expr + + Returns the length of the array. + + This is an alias for :py:func:`array_length`. + + + + .. py:method:: list_ndims() -> Expr + + Returns the number of dimensions of the array. + + This is an alias for :py:func:`array_ndims`. + + + + .. py:method:: literal(value: Any) -> Expr + :staticmethod: + + + Creates a new expression representing a scalar value. + + ``value`` must be a valid PyArrow scalar value or easily castable to one. + + + + .. py:method:: literal_with_metadata(value: Any, metadata: dict[str, str]) -> Expr + :staticmethod: + + + Creates a new expression representing a scalar value with metadata. + + :param value: A valid PyArrow scalar value or easily castable to one. + :param metadata: Metadata to attach to the expression. + + + + .. py:method:: ln() -> Expr + + Returns the natural logarithm (base e) of the argument. + + + + .. py:method:: log10() -> Expr + + Base 10 logarithm of the argument. + + + + .. py:method:: log2() -> Expr + + Base 2 logarithm of the argument. + + + + .. py:method:: lower() -> Expr + + Converts a string to lowercase. + + + + .. py:method:: ltrim() -> Expr + + Removes all characters, spaces by default, from the beginning of a string. + + + + .. py:method:: md5() -> Expr + + Computes an MD5 128-bit checksum for a string expression. + + + + .. py:method:: null_treatment(null_treatment: datafusion.common.NullTreatment) -> ExprFuncBuilder + + Set the treatment for ``null`` values for a window or aggregate function. + + This function will create an :py:class:`ExprFuncBuilder` that can be used to + set parameters for either window or aggregate functions. If used on any other + type of expression, an error will be generated when ``build()`` is called. + + + + .. py:method:: octet_length() -> Expr + + Returns the number of bytes of a string. + + + + .. py:method:: order_by(*exprs: Expr | SortExpr) -> ExprFuncBuilder + + Set the ordering for a window or aggregate function. + + This function will create an :py:class:`ExprFuncBuilder` that can be used to + set parameters for either window or aggregate functions. If used on any other + type of expression, an error will be generated when ``build()`` is called. + + + + .. py:method:: over(window: Window) -> Expr + + Turn an aggregate function into a window function. + + This function turns any aggregate function into a window function. With the + exception of ``partition_by``, how each of the parameters is used is determined + by the underlying aggregate function. + + :param window: Window definition + + + + .. py:method:: partition_by(*partition_by: Expr) -> ExprFuncBuilder + + Set the partitioning for a window function. + + This function will create an :py:class:`ExprFuncBuilder` that can be used to + set parameters for either window or aggregate functions. If used on any other + type of expression, an error will be generated when ``build()`` is called. + + + + .. py:method:: python_value() -> Any + + Extracts the Expr value into `Any`. + + This is only valid for literal expressions. + + :returns: Python object representing literal value of the expression. + + + + .. py:method:: radians() -> Expr + + Converts the argument from degrees to radians. + + + + .. py:method:: reverse() -> Expr + + Reverse the string argument. + + + + .. py:method:: rex_call_operands() -> list[Expr] + + Return the operands of the expression based on it's variant type. + + Row expressions, Rex(s), operate on the concept of operands. Different + variants of Expressions, Expr(s), store those operands in different + datastructures. This function examines the Expr variant and returns + the operands to the calling logic. + + + + .. py:method:: rex_call_operator() -> str + + Extracts the operator associated with a row expression type call. + + + + .. py:method:: rex_type() -> datafusion.common.RexType + + Return the Rex Type of this expression. + + A Rex (Row Expression) specifies a single row of data.That specification + could include user defined functions or types. RexType identifies the + row as one of the possible valid ``RexType``. + + + + .. py:method:: rtrim() -> Expr + + Removes all characters, spaces by default, from the end of a string. + + + + .. py:method:: schema_name() -> str + + Returns the name of this expression as it should appear in a schema. + + This name will not include any CAST expressions. + + + + .. py:method:: sha224() -> Expr + + Computes the SHA-224 hash of a binary string. + + + + .. py:method:: sha256() -> Expr + + Computes the SHA-256 hash of a binary string. + + + + .. py:method:: sha384() -> Expr + + Computes the SHA-384 hash of a binary string. + + + + .. py:method:: sha512() -> Expr + + Computes the SHA-512 hash of a binary string. + + + + .. py:method:: signum() -> Expr + + Returns the sign of the argument (-1, 0, +1). + + + + .. py:method:: sin() -> Expr + + Returns the sine of the argument. + + + + .. py:method:: sinh() -> Expr + + Returns the hyperbolic sine of the argument. + + + + .. py:method:: sort(ascending: bool = True, nulls_first: bool = True) -> SortExpr + + Creates a sort :py:class:`Expr` from an existing :py:class:`Expr`. + + :param ascending: If true, sort in ascending order. + :param nulls_first: Return null values first. + + + + .. py:method:: sqrt() -> Expr + + Returns the square root of the argument. + + + + .. py:method:: string_literal(value: str) -> Expr + :staticmethod: + + + Creates a new expression representing a UTF8 literal value. + + It is different from `literal` because it is pa.string() instead of + pa.string_view() + + This is needed for cases where DataFusion is expecting a UTF8 instead of + UTF8View literal, like in: + https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179 + + + + .. py:method:: tan() -> Expr + + Returns the tangent of the argument. + + + + .. py:method:: tanh() -> Expr + + Returns the hyperbolic tangent of the argument. + + + + .. py:method:: to_hex() -> Expr + + Converts an integer to a hexadecimal string. + + + + .. py:method:: to_variant() -> Any + + Convert this expression into a python object if possible. + + + + .. py:method:: trim() -> Expr + + Removes all characters, spaces by default, from both sides of a string. + + + + .. py:method:: types() -> datafusion.common.DataTypeMap + + Return the ``DataTypeMap``. + + :returns: DataTypeMap which represents the PythonType, Arrow DataType, and + SqlType Enum which this expression represents. + + + + .. py:method:: upper() -> Expr + + Converts a string to uppercase. + + + + .. py:method:: variant_name() -> str + + Returns the name of the Expr variant. + + Ex: ``IsNotNull``, ``Literal``, ``BinaryExpr``, etc + + + + .. py:method:: window_frame(window_frame: WindowFrame) -> ExprFuncBuilder + + Set the frame fora window function. + + This function will create an :py:class:`ExprFuncBuilder` that can be used to + set parameters for either window or aggregate functions. If used on any other + type of expression, an error will be generated when ``build()`` is called. + + + + .. py:attribute:: __radd__ + + + .. py:attribute:: __rand__ + + + .. py:attribute:: __rmod__ + + + .. py:attribute:: __rmul__ + + + .. py:attribute:: __ror__ + + + .. py:attribute:: __rsub__ + + + .. py:attribute:: __rtruediv__ + + + .. py:attribute:: _to_pyarrow_types + :type: ClassVar[dict[type, pyarrow.DataType]] + + + .. py:attribute:: expr + + +.. py:class:: InsertOp + + Bases: :py:obj:`enum.Enum` + + + Insert operation mode. + + These modes are used by the table writing feature to define how record + batches should be written to a table. + + + .. py:attribute:: APPEND + + Appends new rows to the existing table without modifying any existing rows. + + + .. py:attribute:: OVERWRITE + + Overwrites all existing rows in the table with the new rows. + + + .. py:attribute:: REPLACE + + Replace existing rows that collide with the inserted rows. + + Replacement is typically based on a unique key or primary key. + + +.. py:class:: LogicalPlan(plan: datafusion._internal.LogicalPlan) + + Logical Plan. + + A `LogicalPlan` is a node in a tree of relational operators (such as + Projection or Filter). + + Represents transforming an input relation (table) to an output relation + (table) with a potentially different schema. Plans form a dataflow tree + where data flows from leaves up to the root to produce the query result. + + A `LogicalPlan` can be created by the SQL query planner, the DataFrame API, + or programmatically (for example custom query languages). + + This constructor should not be called by the end user. + + + .. py:method:: __eq__(other: LogicalPlan) -> bool + + Test equality. + + + + .. py:method:: __repr__() -> str + + Generate a printable representation of the plan. + + + + .. py:method:: display() -> str + + Print the logical plan. + + + + .. py:method:: display_graphviz() -> str + + Print the graph visualization of the logical plan. + + Returns a `format`able structure that produces lines meant for graphical display + using the `DOT` language. This format can be visualized using software from + [`graphviz`](https://graphviz.org/) + + + + .. py:method:: display_indent() -> str + + Print an indented form of the logical plan. + + + + .. py:method:: display_indent_schema() -> str + + Print an indented form of the schema for the logical plan. + + + + .. py:method:: from_proto(ctx: datafusion.context.SessionContext, data: bytes) -> LogicalPlan + :staticmethod: + + + Create a LogicalPlan from protobuf bytes. + + Tables created in memory from record batches are currently not supported. + + + + .. py:method:: inputs() -> list[LogicalPlan] + + Returns the list of inputs to the logical plan. + + + + .. py:method:: to_proto() -> bytes + + Convert a LogicalPlan to protobuf bytes. + + Tables created in memory from record batches are currently not supported. + + + + .. py:method:: to_variant() -> Any + + Convert the logical plan into its specific variant. + + + + .. py:attribute:: _raw_plan + + +.. py:class:: ParquetColumnOptions(encoding: str | None = None, dictionary_enabled: bool | None = None, compression: str | None = None, statistics_enabled: str | None = None, bloom_filter_enabled: bool | None = None, bloom_filter_fpp: float | None = None, bloom_filter_ndv: int | None = None) + + Parquet options for individual columns. + + Contains the available options that can be applied for an individual Parquet column, + replacing the global options in ``ParquetWriterOptions``. + + Initialize the ParquetColumnOptions. + + :param encoding: Sets encoding for the column path. Valid values are: ``plain``, + ``plain_dictionary``, ``rle``, ``bit_packed``, ``delta_binary_packed``, + ``delta_length_byte_array``, ``delta_byte_array``, ``rle_dictionary``, + and ``byte_stream_split``. These values are not case-sensitive. If + ``None``, uses the default parquet options + :param dictionary_enabled: Sets if dictionary encoding is enabled for the column + path. If `None`, uses the default parquet options + :param compression: Sets default parquet compression codec for the column path. + Valid values are ``uncompressed``, ``snappy``, ``gzip(level)``, ``lzo``, + ``brotli(level)``, ``lz4``, ``zstd(level)``, and ``lz4_raw``. These + values are not case-sensitive. If ``None``, uses the default parquet + options. + :param statistics_enabled: Sets if statistics are enabled for the column Valid + values are: ``none``, ``chunk``, and ``page`` These values are not case + sensitive. If ``None``, uses the default parquet options. + :param bloom_filter_enabled: Sets if bloom filter is enabled for the column path. + If ``None``, uses the default parquet options. + :param bloom_filter_fpp: Sets bloom filter false positive probability for the + column path. If ``None``, uses the default parquet options. + :param bloom_filter_ndv: Sets bloom filter number of distinct values. If ``None``, + uses the default parquet options. + + + .. py:attribute:: bloom_filter_enabled + :value: None + + + + .. py:attribute:: bloom_filter_fpp + :value: None + + + + .. py:attribute:: bloom_filter_ndv + :value: None + + + + .. py:attribute:: compression + :value: None + + + + .. py:attribute:: dictionary_enabled + :value: None + + + + .. py:attribute:: encoding + :value: None + + + + .. py:attribute:: statistics_enabled + :value: None + + + +.. py:class:: ParquetWriterOptions(data_pagesize_limit: int = 1024 * 1024, write_batch_size: int = 1024, writer_version: str = '1.0', skip_arrow_metadata: bool = False, compression: str | None = 'zstd(3)', compression_level: int | None = None, dictionary_enabled: bool | None = True, dictionary_page_size_limit: int = 1024 * 1024, statistics_enabled: str | None = 'page', max_row_group_size: int = 1024 * 1024, created_by: str = 'datafusion-python', column_index_truncate_length: int | None = 64, statistics_truncate_length: int | None = None, data_page_row_count_limit: int = 20000, encoding: str | None = None, bloom_filter_on_write: bool = False, bloom_filter_fpp: float | None = None, bloom_filter_ndv: int | None = None, allow_single_file_parallelism: bool = True, maximum_parallel_row_group_writers: int = 1, maximum_buffered_record_batches_per_stream: int = 2, column_specific_options: dict[str, ParquetColumnOptions] | None = None) + + Advanced parquet writer options. + + Allows settings the writer options that apply to the entire file. Some options can + also be set on a column by column basis, with the field ``column_specific_options`` + (see ``ParquetColumnOptions``). + + Initialize the ParquetWriterOptions. + + :param data_pagesize_limit: Sets best effort maximum size of data page in bytes. + :param write_batch_size: Sets write_batch_size in bytes. + :param writer_version: Sets parquet writer version. Valid values are ``1.0`` and + ``2.0``. + :param skip_arrow_metadata: Skip encoding the embedded arrow metadata in the + KV_meta. + :param compression: Compression type to use. Default is ``zstd(3)``. + Available compression types are + + - ``uncompressed``: No compression. + - ``snappy``: Snappy compression. + - ``gzip(n)``: Gzip compression with level n. + - ``brotli(n)``: Brotli compression with level n. + - ``lz4``: LZ4 compression. + - ``lz4_raw``: LZ4_RAW compression. + - ``zstd(n)``: Zstandard compression with level n. + :param compression_level: Compression level to set. + :param dictionary_enabled: Sets if dictionary encoding is enabled. If ``None``, + uses the default parquet writer setting. + :param dictionary_page_size_limit: Sets best effort maximum dictionary page size, + in bytes. + :param statistics_enabled: Sets if statistics are enabled for any column Valid + values are ``none``, ``chunk``, and ``page``. If ``None``, uses the + default parquet writer setting. + :param max_row_group_size: Target maximum number of rows in each row group + (defaults to 1M rows). Writing larger row groups requires more memory + to write, but can get better compression and be faster to read. + :param created_by: Sets "created by" property. + :param column_index_truncate_length: Sets column index truncate length. + :param statistics_truncate_length: Sets statistics truncate length. If ``None``, + uses the default parquet writer setting. + :param data_page_row_count_limit: Sets best effort maximum number of rows in a data + page. + :param encoding: Sets default encoding for any column. Valid values are ``plain``, + ``plain_dictionary``, ``rle``, ``bit_packed``, ``delta_binary_packed``, + ``delta_length_byte_array``, ``delta_byte_array``, ``rle_dictionary``, + and ``byte_stream_split``. If ``None``, uses the default parquet writer + setting. + :param bloom_filter_on_write: Write bloom filters for all columns when creating + parquet files. + :param bloom_filter_fpp: Sets bloom filter false positive probability. If ``None``, + uses the default parquet writer setting + :param bloom_filter_ndv: Sets bloom filter number of distinct values. If ``None``, + uses the default parquet writer setting. + :param allow_single_file_parallelism: Controls whether DataFusion will attempt to + speed up writing parquet files by serializing them in parallel. Each + column in each row group in each output file are serialized in parallel + leveraging a maximum possible core count of + ``n_files * n_row_groups * n_columns``. + :param maximum_parallel_row_group_writers: By default parallel parquet writer is + tuned for minimum memory usage in a streaming execution plan. You may + see a performance benefit when writing large parquet files by increasing + ``maximum_parallel_row_group_writers`` and + ``maximum_buffered_record_batches_per_stream`` if your system has idle + cores and can tolerate additional memory usage. Boosting these values is + likely worthwhile when writing out already in-memory data, such as from + a cached data frame. + :param maximum_buffered_record_batches_per_stream: See + ``maximum_parallel_row_group_writers``. + :param column_specific_options: Overrides options for specific columns. If a column + is not a part of this dictionary, it will use the parameters provided + here. + + + .. py:attribute:: allow_single_file_parallelism + :value: True + + + + .. py:attribute:: bloom_filter_fpp + :value: None + + + + .. py:attribute:: bloom_filter_ndv + :value: None + + + + .. py:attribute:: bloom_filter_on_write + :value: False + + + + .. py:attribute:: column_index_truncate_length + :value: 64 + + + + .. py:attribute:: column_specific_options + :value: None + + + + .. py:attribute:: created_by + :value: 'datafusion-python' + + + + .. py:attribute:: data_page_row_count_limit + :value: 20000 + + + + .. py:attribute:: data_pagesize_limit + :value: 1048576 + + + + .. py:attribute:: dictionary_enabled + :value: True + + + + .. py:attribute:: dictionary_page_size_limit + :value: 1048576 + + + + .. py:attribute:: encoding + :value: None + + + + .. py:attribute:: max_row_group_size + :value: 1048576 + + + + .. py:attribute:: maximum_buffered_record_batches_per_stream + :value: 2 + + + + .. py:attribute:: maximum_parallel_row_group_writers + :value: 1 + + + + .. py:attribute:: skip_arrow_metadata + :value: False + + + + .. py:attribute:: statistics_enabled + :value: 'page' + + + + .. py:attribute:: statistics_truncate_length + :value: None + + + + .. py:attribute:: write_batch_size + :value: 1024 + + + + .. py:attribute:: writer_version + :value: '1.0' + + + +.. py:class:: RecordBatch(record_batch: datafusion._internal.RecordBatch) + + This class is essentially a wrapper for :py:class:`pa.RecordBatch`. + + This constructor is generally not called by the end user. + + See the :py:class:`RecordBatchStream` iterator for generating this class. + + + .. py:method:: __arrow_c_array__(requested_schema: object | None = None) -> tuple[object, object] + + Export the record batch via the Arrow C Data Interface. + + This allows zero-copy interchange with libraries that support the + `Arrow PyCapsule interface `_. + + :param requested_schema: Attempt to provide the record batch using this + schema. Only straightforward projections such as column + selection or reordering are applied. + + :returns: Two Arrow PyCapsule objects representing the ``ArrowArray`` and + ``ArrowSchema``. + + + + .. py:method:: to_pyarrow() -> pyarrow.RecordBatch + + Convert to :py:class:`pa.RecordBatch`. + + + + .. py:attribute:: record_batch + + +.. py:class:: RecordBatchStream(record_batch_stream: datafusion._internal.RecordBatchStream) + + This class represents a stream of record batches. + + These are typically the result of a + :py:func:`~datafusion.dataframe.DataFrame.execute_stream` operation. + + This constructor is typically not called by the end user. + + + .. py:method:: __aiter__() -> typing_extensions.Self + + Return an asynchronous iterator over record batches. + + + + .. py:method:: __anext__() -> RecordBatch + :async: + + + Return the next :py:class:`RecordBatch` in the stream asynchronously. + + + + .. py:method:: __iter__() -> typing_extensions.Self + + Return an iterator over record batches. + + + + .. py:method:: __next__() -> RecordBatch + + Return the next :py:class:`RecordBatch` in the stream. + + + + .. py:method:: next() -> RecordBatch + + See :py:func:`__next__` for the iterator function. + + + + .. py:attribute:: rbs + + +.. py:class:: RuntimeEnvBuilder + + Runtime configuration options. + + Create a new :py:class:`RuntimeEnvBuilder` with default values. + + + .. py:method:: with_disk_manager_disabled() -> RuntimeEnvBuilder + + Disable the disk manager, attempts to create temporary files will error. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + + + .. py:method:: with_disk_manager_os() -> RuntimeEnvBuilder + + Use the operating system's temporary directory for disk manager. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + + + .. py:method:: with_disk_manager_specified(*paths: str | pathlib.Path) -> RuntimeEnvBuilder + + Use the specified paths for the disk manager's temporary files. + + :param paths: Paths to use for the disk manager's temporary files. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + + + .. py:method:: with_fair_spill_pool(size: int) -> RuntimeEnvBuilder + + Use a fair spill pool with the specified size. + + This pool works best when you know beforehand the query has multiple spillable + operators that will likely all need to spill. Sometimes it will cause spills + even when there was sufficient memory (reserved for other operators) to avoid + doing so:: + + ┌───────────────────────z──────────────────────z───────────────┐ + │ z z │ + │ z z │ + │ Spillable z Unspillable z Free │ + │ Memory z Memory z Memory │ + │ z z │ + │ z z │ + └───────────────────────z──────────────────────z───────────────┘ + + :param size: Size of the memory pool in bytes. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + Examples usage:: + + config = RuntimeEnvBuilder().with_fair_spill_pool(1024) + + + + .. py:method:: with_greedy_memory_pool(size: int) -> RuntimeEnvBuilder + + Use a greedy memory pool with the specified size. + + This pool works well for queries that do not need to spill or have a single + spillable operator. See :py:func:`with_fair_spill_pool` if there are + multiple spillable operators that all will spill. + + :param size: Size of the memory pool in bytes. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + Example usage:: + + config = RuntimeEnvBuilder().with_greedy_memory_pool(1024) + + + + .. py:method:: with_temp_file_path(path: str | pathlib.Path) -> RuntimeEnvBuilder + + Use the specified path to create any needed temporary files. + + :param path: Path to use for temporary files. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + Example usage:: + + config = RuntimeEnvBuilder().with_temp_file_path("/tmp") + + + + .. py:method:: with_unbounded_memory_pool() -> RuntimeEnvBuilder + + Use an unbounded memory pool. + + :returns: A new :py:class:`RuntimeEnvBuilder` object with the updated setting. + + + + .. py:attribute:: config_internal + + +.. py:class:: SQLOptions + + Options to be used when performing SQL queries. + + Create a new :py:class:`SQLOptions` with default values. + + The default values are: + - DDL commands are allowed + - DML commands are allowed + - Statements are allowed + + + .. py:method:: with_allow_ddl(allow: bool = True) -> SQLOptions + + Should DDL (Data Definition Language) commands be run? + + Examples of DDL commands include ``CREATE TABLE`` and ``DROP TABLE``. + + :param allow: Allow DDL commands to be run. + + :returns: A new :py:class:`SQLOptions` object with the updated setting. + + Example usage:: + + options = SQLOptions().with_allow_ddl(True) + + + + .. py:method:: with_allow_dml(allow: bool = True) -> SQLOptions + + Should DML (Data Manipulation Language) commands be run? + + Examples of DML commands include ``INSERT INTO`` and ``DELETE``. + + :param allow: Allow DML commands to be run. + + :returns: A new :py:class:`SQLOptions` object with the updated setting. + + Example usage:: + + options = SQLOptions().with_allow_dml(True) + + + + .. py:method:: with_allow_statements(allow: bool = True) -> SQLOptions + + Should statements such as ``SET VARIABLE`` and ``BEGIN TRANSACTION`` be run? + + :param allow: Allow statements to be run. + + :returns: py:class:SQLOptions` object with the updated setting. + :rtype: A new + + Example usage:: + + options = SQLOptions().with_allow_statements(True) + + + + .. py:attribute:: options_internal + + +.. py:class:: ScalarUDF(name: str, func: collections.abc.Callable[Ellipsis, _R], input_fields: list[pyarrow.Field], return_field: _R, volatility: Volatility | str) + + Class for performing scalar user-defined functions (UDF). + + Scalar UDFs operate on a row by row basis. See also :py:class:`AggregateUDF` for + operating on a group of rows. + + Instantiate a scalar user-defined function (UDF). + + See helper method :py:func:`udf` for argument details. + + + .. py:method:: __call__(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Execute the UDF. + + This function is not typically called by an end user. These calls will + occur during the evaluation of the dataframe. + + + + .. py:method:: __repr__() -> str + + Print a string representation of the Scalar UDF. + + + + .. py:method:: from_pycapsule(func: ScalarUDFExportable) -> ScalarUDF + :staticmethod: + + + Create a Scalar UDF from ScalarUDF PyCapsule object. + + This function will instantiate a Scalar UDF that uses a DataFusion + ScalarUDF that is exported via the FFI bindings. + + + + .. py:method:: udf(input_fields: collections.abc.Sequence[pyarrow.DataType | pyarrow.Field] | pyarrow.DataType | pyarrow.Field, return_field: pyarrow.DataType | pyarrow.Field, volatility: Volatility | str, name: str | None = None) -> collections.abc.Callable[Ellipsis, ScalarUDF] + udf(func: collections.abc.Callable[Ellipsis, _R], input_fields: collections.abc.Sequence[pyarrow.DataType | pyarrow.Field] | pyarrow.DataType | pyarrow.Field, return_field: pyarrow.DataType | pyarrow.Field, volatility: Volatility | str, name: str | None = None) -> ScalarUDF + udf(func: ScalarUDFExportable) -> ScalarUDF + :staticmethod: + + + Create a new User-Defined Function (UDF). + + This class can be used both as either a function or a decorator. + + Usage: + - As a function: ``udf(func, input_fields, return_field, volatility, name)``. + - As a decorator: ``@udf(input_fields, return_field, volatility, name)``. + When used a decorator, do **not** pass ``func`` explicitly. + + In lieu of passing a PyArrow Field, you can pass a DataType for simplicity. + When you do so, it will be assumed that the nullability of the inputs and + output are True and that they have no metadata. + + :param func: Only needed when calling as a function. + Skip this argument when using `udf` as a decorator. If you have a Rust + backed ScalarUDF within a PyCapsule, you can pass this parameter + and ignore the rest. They will be determined directly from the + underlying function. See the online documentation for more information. + :type func: Callable, optional + :param input_fields: The data types or Fields + of the arguments to ``func``. This list must be of the same length + as the number of arguments. + :type input_fields: list[pa.Field | pa.DataType] + :param return_field: The field of the return value from the function. + :type return_field: _R + :param volatility: See `Volatility` for allowed values. + :type volatility: Volatility | str + :param name: A descriptive name for the function. + :type name: Optional[str] + + :returns: A user-defined function that can be used in SQL expressions, + data aggregation, or window function calls. + + Example: Using ``udf`` as a function:: + + def double_func(x): + return x * 2 + double_udf = udf(double_func, [pa.int32()], pa.int32(), + "volatile", "double_it") + + Example: Using ``udf`` as a decorator:: + + @udf([pa.int32()], pa.int32(), "volatile", "double_it") + def double_udf(x): + return x * 2 + + + + .. py:attribute:: _udf + + +.. py:class:: SessionConfig(config_options: dict[str, str] | None = None) + + Session configuration options. + + Create a new :py:class:`SessionConfig` with the given configuration options. + + :param config_options: Configuration options. + + + .. py:method:: set(key: str, value: str) -> SessionConfig + + Set a configuration option. + + Args: + key: Option key. + value: Option value. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_batch_size(batch_size: int) -> SessionConfig + + Customize batch size. + + :param batch_size: Batch size. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_create_default_catalog_and_schema(enabled: bool = True) -> SessionConfig + + Control if the default catalog and schema will be automatically created. + + :param enabled: Whether the default catalog and schema will be + automatically created. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_default_catalog_and_schema(catalog: str, schema: str) -> SessionConfig + + Select a name for the default catalog and schema. + + :param catalog: Catalog name. + :param schema: Schema name. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_information_schema(enabled: bool = True) -> SessionConfig + + Enable or disable the inclusion of ``information_schema`` virtual tables. + + :param enabled: Whether to include ``information_schema`` virtual tables. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_parquet_pruning(enabled: bool = True) -> SessionConfig + + Enable or disable the use of pruning predicate for parquet readers. + + Pruning predicates will enable the reader to skip row groups. + + :param enabled: Whether to use pruning predicate for parquet readers. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_repartition_aggregations(enabled: bool = True) -> SessionConfig + + Enable or disable the use of repartitioning for aggregations. + + Enabling this improves parallelism. + + :param enabled: Whether to use repartitioning for aggregations. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_repartition_file_min_size(size: int) -> SessionConfig + + Set minimum file range size for repartitioning scans. + + :param size: Minimum file range size. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_repartition_file_scans(enabled: bool = True) -> SessionConfig + + Enable or disable the use of repartitioning for file scans. + + :param enabled: Whether to use repartitioning for file scans. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_repartition_joins(enabled: bool = True) -> SessionConfig + + Enable or disable the use of repartitioning for joins to improve parallelism. + + :param enabled: Whether to use repartitioning for joins. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_repartition_sorts(enabled: bool = True) -> SessionConfig + + Enable or disable the use of repartitioning for window functions. + + This may improve parallelism. + + :param enabled: Whether to use repartitioning for window functions. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_repartition_windows(enabled: bool = True) -> SessionConfig + + Enable or disable the use of repartitioning for window functions. + + This may improve parallelism. + + :param enabled: Whether to use repartitioning for window functions. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:method:: with_target_partitions(target_partitions: int) -> SessionConfig + + Customize the number of target partitions for query execution. + + Increasing partitions can increase concurrency. + + :param target_partitions: Number of target partitions. + + :returns: A new :py:class:`SessionConfig` object with the updated setting. + + + + .. py:attribute:: config_internal + + +.. py:class:: Table(table: Table | datafusion.context.TableProviderExportable | datafusion.DataFrame | pyarrow.dataset.Dataset, ctx: datafusion.SessionContext | None = None) + + A DataFusion table. + + Internally we currently support the following types of tables: + + - Tables created using built-in DataFusion methods, such as + reading from CSV or Parquet + - pyarrow datasets + - DataFusion DataFrames, which will be converted into a view + - Externally provided tables implemented with the FFI PyCapsule + interface (advanced) + + Constructor. + + + .. py:method:: __repr__() -> str + + Print a string representation of the table. + + + + .. py:method:: from_dataset(dataset: pyarrow.dataset.Dataset) -> Table + :staticmethod: + + + Turn a :mod:`pyarrow.dataset` ``Dataset`` into a :class:`Table`. + + + + .. py:attribute:: __slots__ + :value: ('_inner',) + + + + .. py:attribute:: _inner + + + .. py:property:: kind + :type: str + + + Returns the kind of table. + + + .. py:property:: schema + :type: pyarrow.Schema + + + Returns the schema associated with this table. + + +.. py:class:: TableFunction(name: str, func: collections.abc.Callable[[], any], ctx: datafusion.SessionContext | None = None) + + Class for performing user-defined table functions (UDTF). + + Table functions generate new table providers based on the + input expressions. + + Instantiate a user-defined table function (UDTF). + + See :py:func:`udtf` for a convenience function and argument + descriptions. + + + .. py:method:: __call__(*args: datafusion.expr.Expr) -> Any + + Execute the UDTF and return a table provider. + + + + .. py:method:: __repr__() -> str + + User printable representation. + + + + .. py:method:: _create_table_udf(func: collections.abc.Callable[Ellipsis, Any], name: str) -> TableFunction + :staticmethod: + + + Create a TableFunction instance from function arguments. + + + + .. py:method:: _create_table_udf_decorator(name: str | None = None) -> collections.abc.Callable[[collections.abc.Callable[[], WindowEvaluator]], collections.abc.Callable[Ellipsis, datafusion.expr.Expr]] + :staticmethod: + + + Create a decorator for a WindowUDF. + + + + .. py:method:: udtf(name: str) -> collections.abc.Callable[Ellipsis, Any] + udtf(func: collections.abc.Callable[[], Any], name: str) -> TableFunction + :staticmethod: + + + Create a new User-Defined Table Function (UDTF). + + + + .. py:attribute:: _udtf + + +.. py:class:: WindowFrame(units: str, start_bound: Any | None, end_bound: Any | None) + + Defines a window frame for performing window operations. + + Construct a window frame using the given parameters. + + :param units: Should be one of ``rows``, ``range``, or ``groups``. + :param start_bound: Sets the preceding bound. Must be >= 0. If none, this + will be set to unbounded. If unit type is ``groups``, this + parameter must be set. + :param end_bound: Sets the following bound. Must be >= 0. If none, this + will be set to unbounded. If unit type is ``groups``, this + parameter must be set. + + + .. py:method:: __repr__() -> str + + Print a string representation of the window frame. + + + + .. py:method:: get_frame_units() -> str + + Returns the window frame units for the bounds. + + + + .. py:method:: get_lower_bound() -> WindowFrameBound + + Returns starting bound. + + + + .. py:method:: get_upper_bound() -> WindowFrameBound + + Returns end bound. + + + + .. py:attribute:: window_frame + + +.. py:class:: WindowUDF(name: str, func: collections.abc.Callable[[], WindowEvaluator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str) + + Class for performing window user-defined functions (UDF). + + Window UDFs operate on a partition of rows. See + also :py:class:`ScalarUDF` for operating on a row by row basis. + + Instantiate a user-defined window function (UDWF). + + See :py:func:`udwf` for a convenience function and argument + descriptions. + + + .. py:method:: __call__(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Execute the UDWF. + + This function is not typically called by an end user. These calls will + occur during the evaluation of the dataframe. + + + + .. py:method:: __repr__() -> str + + Print a string representation of the Window UDF. + + + + .. py:method:: _create_window_udf(func: collections.abc.Callable[[], WindowEvaluator], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) -> WindowUDF + :staticmethod: + + + Create a WindowUDF instance from function arguments. + + + + .. py:method:: _create_window_udf_decorator(input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) -> collections.abc.Callable[[collections.abc.Callable[[], WindowEvaluator]], collections.abc.Callable[Ellipsis, datafusion.expr.Expr]] + :staticmethod: + + + Create a decorator for a WindowUDF. + + + + .. py:method:: _get_default_name(func: collections.abc.Callable) -> str + :staticmethod: + + + Get the default name for a function based on its attributes. + + + + .. py:method:: _normalize_input_types(input_types: pyarrow.DataType | list[pyarrow.DataType]) -> list[pyarrow.DataType] + :staticmethod: + + + Convert a single DataType to a list if needed. + + + + .. py:method:: from_pycapsule(func: WindowUDFExportable) -> WindowUDF + :staticmethod: + + + Create a Window UDF from WindowUDF PyCapsule object. + + This function will instantiate a Window UDF that uses a DataFusion + WindowUDF that is exported via the FFI bindings. + + + + .. py:method:: udwf(input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) -> collections.abc.Callable[Ellipsis, WindowUDF] + udwf(func: collections.abc.Callable[[], WindowEvaluator], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) -> WindowUDF + :staticmethod: + + + Create a new User-Defined Window Function (UDWF). + + This class can be used both as either a function or a decorator. + + Usage: + - As a function: ``udwf(func, input_types, return_type, volatility, name)``. + - As a decorator: ``@udwf(input_types, return_type, volatility, name)``. + When using ``udwf`` as a decorator, do not pass ``func`` explicitly. + + Function example:: + + import pyarrow as pa + + class BiasedNumbers(WindowEvaluator): + def __init__(self, start: int = 0) -> None: + self.start = start + + def evaluate_all(self, values: list[pa.Array], + num_rows: int) -> pa.Array: + return pa.array([self.start + i for i in range(num_rows)]) + + def bias_10() -> BiasedNumbers: + return BiasedNumbers(10) + + udwf1 = udwf(BiasedNumbers, pa.int64(), pa.int64(), "immutable") + udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable") + udwf3 = udwf(lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable") + + + Decorator example:: + + @udwf(pa.int64(), pa.int64(), "immutable") + def biased_numbers() -> BiasedNumbers: + return BiasedNumbers(10) + + :param func: Only needed when calling as a function. Skip this argument when + using ``udwf`` as a decorator. If you have a Rust backed WindowUDF + within a PyCapsule, you can pass this parameter and ignore the rest. + They will be determined directly from the underlying function. See + the online documentation for more information. + :param input_types: The data types of the arguments. + :param return_type: The data type of the return value. + :param volatility: See :py:class:`Volatility` for allowed values. + :param name: A descriptive name for the function. + + :returns: A user-defined window function that can be used in window function calls. + + + + .. py:attribute:: _udwf + + +.. py:function:: configure_formatter(**kwargs: Any) -> None + + Configure the global DataFrame HTML formatter. + + This function creates a new formatter with the provided configuration + and sets it as the global formatter for all DataFrames. + + :param \*\*kwargs: Formatter configuration parameters like max_cell_length, + max_width, max_height, enable_cell_expansion, etc. + + :raises ValueError: If any invalid parameters are provided + + .. rubric:: Example + + >>> from datafusion.html_formatter import configure_formatter + >>> configure_formatter( + ... max_cell_length=50, + ... max_height=500, + ... enable_cell_expansion=True, + ... use_shared_styles=True + ... ) + + +.. py:function:: lit(value: Any) -> expr.Expr + + Create a literal expression. + + +.. py:function:: literal(value: Any) -> expr.Expr + + Create a literal expression. + + +.. py:function:: read_avro(path: str | pathlib.Path, schema: pyarrow.Schema | None = None, file_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_extension: str = '.avro') -> datafusion.dataframe.DataFrame + + Create a :py:class:`DataFrame` for reading Avro data source. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + :param path: Path to the Avro file. + :param schema: The data source schema. + :param file_partition_cols: Partition columns. + :param file_extension: File extension to select. + + :returns: DataFrame representation of the read Avro file + + +.. py:function:: read_csv(path: str | pathlib.Path | list[str] | list[pathlib.Path], schema: pyarrow.Schema | None = None, has_header: bool = True, delimiter: str = ',', schema_infer_max_records: int = 1000, file_extension: str = '.csv', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None, options: datafusion.options.CsvReadOptions | None = None) -> datafusion.dataframe.DataFrame + + Read a CSV data source. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + :param path: Path to the CSV file + :param schema: An optional schema representing the CSV files. If None, the + CSV reader will try to infer it based on data in file. + :param has_header: Whether the CSV file have a header. If schema inference + is run on a file with no headers, default column names are + created. + :param delimiter: An optional column delimiter. + :param schema_infer_max_records: Maximum number of rows to read from CSV + files for schema inference if needed. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param table_partition_cols: Partition columns. + :param file_compression_type: File compression type. + :param options: Set advanced options for CSV reading. This cannot be + combined with any of the other options in this method. + + :returns: DataFrame representation of the read CSV files + + +.. py:function:: read_json(path: str | pathlib.Path, schema: pyarrow.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = '.json', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None) -> datafusion.dataframe.DataFrame + + Read a line-delimited JSON data source. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + :param path: Path to the JSON file. + :param schema: The data source schema. + :param schema_infer_max_records: Maximum number of rows to read from JSON + files for schema inference if needed. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param table_partition_cols: Partition columns. + :param file_compression_type: File compression type. + + :returns: DataFrame representation of the read JSON files. + + +.. py:function:: read_parquet(path: str | pathlib.Path, table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, parquet_pruning: bool = True, file_extension: str = '.parquet', skip_metadata: bool = True, schema: pyarrow.Schema | None = None, file_sort_order: list[list[datafusion.expr.Expr]] | None = None) -> datafusion.dataframe.DataFrame + + Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + :param path: Path to the Parquet file. + :param table_partition_cols: Partition columns. + :param parquet_pruning: Whether the parquet reader should use the predicate + to prune row groups. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param skip_metadata: Whether the parquet reader should skip any metadata + that may be in the file schema. This can help avoid schema + conflicts due to metadata. + :param schema: An optional schema representing the parquet files. If None, + the parquet reader will try to infer it based on data in the + file. + :param file_sort_order: Sort order for the file. + + :returns: DataFrame representation of the read Parquet files + + +.. py:data:: DFSchema + +.. py:data:: col + :type: Col + +.. py:data:: column + :type: Col + +.. py:data:: udaf + +.. py:data:: udf + +.. py:data:: udtf + +.. py:data:: udwf + diff --git a/_sources/autoapi/datafusion/input/base/index.rst.txt b/_sources/autoapi/datafusion/input/base/index.rst.txt new file mode 100644 index 000000000..9b962d05c --- /dev/null +++ b/_sources/autoapi/datafusion/input/base/index.rst.txt @@ -0,0 +1,55 @@ +datafusion.input.base +===================== + +.. py:module:: datafusion.input.base + +.. autoapi-nested-parse:: + + This module provides ``BaseInputSource``. + + A user can extend this to provide a custom input source. + + + +Classes +------- + +.. autoapisummary:: + + datafusion.input.base.BaseInputSource + + +Module Contents +--------------- + +.. py:class:: BaseInputSource + + Bases: :py:obj:`abc.ABC` + + + Base Input Source class. + + If a consuming library would like to provider their own InputSource this is + the class they should extend to write their own. + + Once completed the Plugin InputSource can be registered with the + SessionContext to ensure that it will be used in order + to obtain the SqlTable information from the custom datasource. + + + .. py:method:: build_table(input_item: Any, table_name: str, **kwarg: Any) -> datafusion.common.SqlTable + :abstractmethod: + + + Create a table from the input source. + + + + .. py:method:: is_correct_input(input_item: Any, table_name: str, **kwargs: Any) -> bool + :abstractmethod: + + + Returns `True` if the input is valid. + + + diff --git a/_sources/autoapi/datafusion/input/index.rst.txt b/_sources/autoapi/datafusion/input/index.rst.txt new file mode 100644 index 000000000..9a081edfe --- /dev/null +++ b/_sources/autoapi/datafusion/input/index.rst.txt @@ -0,0 +1,56 @@ +datafusion.input +================ + +.. py:module:: datafusion.input + +.. autoapi-nested-parse:: + + This package provides for input sources. + + The primary class used within DataFusion is ``LocationInputPlugin``. + + + +Submodules +---------- + +.. toctree:: + :maxdepth: 1 + + /autoapi/datafusion/input/base/index + /autoapi/datafusion/input/location/index + + +Classes +------- + +.. autoapisummary:: + + datafusion.input.LocationInputPlugin + + +Package Contents +---------------- + +.. py:class:: LocationInputPlugin + + Bases: :py:obj:`datafusion.input.base.BaseInputSource` + + + Input Plugin for everything. + + This can be read in from a file (on disk, remote etc.). + + + .. py:method:: build_table(input_item: str, table_name: str, **kwargs: Any) -> datafusion.common.SqlTable + + Create a table from the input source. + + + + .. py:method:: is_correct_input(input_item: Any, table_name: str, **kwargs: Any) -> bool + + Returns `True` if the input is valid. + + + diff --git a/_sources/autoapi/datafusion/input/location/index.rst.txt b/_sources/autoapi/datafusion/input/location/index.rst.txt new file mode 100644 index 000000000..609a280bd --- /dev/null +++ b/_sources/autoapi/datafusion/input/location/index.rst.txt @@ -0,0 +1,44 @@ +datafusion.input.location +========================= + +.. py:module:: datafusion.input.location + +.. autoapi-nested-parse:: + + The default input source for DataFusion. + + + +Classes +------- + +.. autoapisummary:: + + datafusion.input.location.LocationInputPlugin + + +Module Contents +--------------- + +.. py:class:: LocationInputPlugin + + Bases: :py:obj:`datafusion.input.base.BaseInputSource` + + + Input Plugin for everything. + + This can be read in from a file (on disk, remote etc.). + + + .. py:method:: build_table(input_item: str, table_name: str, **kwargs: Any) -> datafusion.common.SqlTable + + Create a table from the input source. + + + + .. py:method:: is_correct_input(input_item: Any, table_name: str, **kwargs: Any) -> bool + + Returns `True` if the input is valid. + + + diff --git a/_sources/autoapi/datafusion/io/index.rst.txt b/_sources/autoapi/datafusion/io/index.rst.txt new file mode 100644 index 000000000..453d6fa04 --- /dev/null +++ b/_sources/autoapi/datafusion/io/index.rst.txt @@ -0,0 +1,113 @@ +datafusion.io +============= + +.. py:module:: datafusion.io + +.. autoapi-nested-parse:: + + IO read functions using global context. + + + +Functions +--------- + +.. autoapisummary:: + + datafusion.io.read_avro + datafusion.io.read_csv + datafusion.io.read_json + datafusion.io.read_parquet + + +Module Contents +--------------- + +.. py:function:: read_avro(path: str | pathlib.Path, schema: pyarrow.Schema | None = None, file_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_extension: str = '.avro') -> datafusion.dataframe.DataFrame + + Create a :py:class:`DataFrame` for reading Avro data source. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + :param path: Path to the Avro file. + :param schema: The data source schema. + :param file_partition_cols: Partition columns. + :param file_extension: File extension to select. + + :returns: DataFrame representation of the read Avro file + + +.. py:function:: read_csv(path: str | pathlib.Path | list[str] | list[pathlib.Path], schema: pyarrow.Schema | None = None, has_header: bool = True, delimiter: str = ',', schema_infer_max_records: int = 1000, file_extension: str = '.csv', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None, options: datafusion.options.CsvReadOptions | None = None) -> datafusion.dataframe.DataFrame + + Read a CSV data source. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + :param path: Path to the CSV file + :param schema: An optional schema representing the CSV files. If None, the + CSV reader will try to infer it based on data in file. + :param has_header: Whether the CSV file have a header. If schema inference + is run on a file with no headers, default column names are + created. + :param delimiter: An optional column delimiter. + :param schema_infer_max_records: Maximum number of rows to read from CSV + files for schema inference if needed. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param table_partition_cols: Partition columns. + :param file_compression_type: File compression type. + :param options: Set advanced options for CSV reading. This cannot be + combined with any of the other options in this method. + + :returns: DataFrame representation of the read CSV files + + +.. py:function:: read_json(path: str | pathlib.Path, schema: pyarrow.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = '.json', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None) -> datafusion.dataframe.DataFrame + + Read a line-delimited JSON data source. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + :param path: Path to the JSON file. + :param schema: The data source schema. + :param schema_infer_max_records: Maximum number of rows to read from JSON + files for schema inference if needed. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param table_partition_cols: Partition columns. + :param file_compression_type: File compression type. + + :returns: DataFrame representation of the read JSON files. + + +.. py:function:: read_parquet(path: str | pathlib.Path, table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, parquet_pruning: bool = True, file_extension: str = '.parquet', skip_metadata: bool = True, schema: pyarrow.Schema | None = None, file_sort_order: list[list[datafusion.expr.Expr]] | None = None) -> datafusion.dataframe.DataFrame + + Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + :param path: Path to the Parquet file. + :param table_partition_cols: Partition columns. + :param parquet_pruning: Whether the parquet reader should use the predicate + to prune row groups. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param skip_metadata: Whether the parquet reader should skip any metadata + that may be in the file schema. This can help avoid schema + conflicts due to metadata. + :param schema: An optional schema representing the parquet files. If None, + the parquet reader will try to infer it based on data in the + file. + :param file_sort_order: Sort order for the file. + + :returns: DataFrame representation of the read Parquet files + + diff --git a/_sources/autoapi/datafusion/object_store/index.rst.txt b/_sources/autoapi/datafusion/object_store/index.rst.txt new file mode 100644 index 000000000..d38e86792 --- /dev/null +++ b/_sources/autoapi/datafusion/object_store/index.rst.txt @@ -0,0 +1,36 @@ +datafusion.object_store +======================= + +.. py:module:: datafusion.object_store + +.. autoapi-nested-parse:: + + Object store functionality. + + + +Attributes +---------- + +.. autoapisummary:: + + datafusion.object_store.AmazonS3 + datafusion.object_store.GoogleCloud + datafusion.object_store.Http + datafusion.object_store.LocalFileSystem + datafusion.object_store.MicrosoftAzure + + +Module Contents +--------------- + +.. py:data:: AmazonS3 + +.. py:data:: GoogleCloud + +.. py:data:: Http + +.. py:data:: LocalFileSystem + +.. py:data:: MicrosoftAzure + diff --git a/_sources/autoapi/datafusion/options/index.rst.txt b/_sources/autoapi/datafusion/options/index.rst.txt new file mode 100644 index 000000000..25a6d6464 --- /dev/null +++ b/_sources/autoapi/datafusion/options/index.rst.txt @@ -0,0 +1,242 @@ +datafusion.options +================== + +.. py:module:: datafusion.options + +.. autoapi-nested-parse:: + + Options for reading various file formats. + + + +Classes +------- + +.. autoapisummary:: + + datafusion.options.CsvReadOptions + + +Module Contents +--------------- + +.. py:class:: CsvReadOptions(*, has_header: bool = True, delimiter: str = ',', quote: str = '"', terminator: str | None = None, escape: str | None = None, comment: str | None = None, newlines_in_values: bool = False, schema: pyarrow.Schema | None = None, schema_infer_max_records: int = DEFAULT_MAX_INFER_SCHEMA, file_extension: str = '.csv', table_partition_cols: list[tuple[str, pyarrow.DataType]] | None = None, file_compression_type: str = '', file_sort_order: list[list[datafusion.expr.SortExpr]] | None = None, null_regex: str | None = None, truncated_rows: bool = False) + + Options for reading CSV files. + + This class provides a builder pattern for configuring CSV reading options. + All methods starting with ``with_`` return ``self`` to allow method chaining. + + Initialize CsvReadOptions. + + :param has_header: Does the CSV file have a header row? If schema inference + is run on a file with no headers, default column names are created. + :param delimiter: Column delimiter character. Must be a single ASCII character. + :param quote: Quote character for fields containing delimiters or newlines. + Must be a single ASCII character. + :param terminator: Optional line terminator character. If ``None``, uses CRLF. + Must be a single ASCII character. + :param escape: Optional escape character for quotes. Must be a single ASCII + character. + :param comment: If specified, lines beginning with this character are ignored. + Must be a single ASCII character. + :param newlines_in_values: Whether newlines in quoted values are supported. + Parsing newlines in quoted values may be affected by execution + behavior such as parallel file scanning. Setting this to ``True`` + ensures that newlines in values are parsed successfully, which may + reduce performance. + :param schema: Optional PyArrow schema representing the CSV files. If ``None``, + the CSV reader will try to infer it based on data in the file. + :param schema_infer_max_records: Maximum number of rows to read from CSV files + for schema inference if needed. + :param file_extension: File extension; only files with this extension are + selected for data input. + :param table_partition_cols: Partition columns as a list of tuples of + (column_name, data_type). + :param file_compression_type: File compression type. Supported values are + ``"gzip"``, ``"bz2"``, ``"xz"``, ``"zstd"``, or empty string for + uncompressed. + :param file_sort_order: Optional sort order of the files as a list of sort + expressions per file. + :param null_regex: Optional regex pattern to match null values in the CSV. + :param truncated_rows: Whether to allow truncated rows when parsing. By default + this is ``False`` and will error if the CSV rows have different + lengths. When set to ``True``, it will allow records with less than + the expected number of columns and fill the missing columns with + nulls. If the record's schema is not nullable, it will still return + an error. + + + .. py:method:: to_inner() -> datafusion._internal.options.CsvReadOptions + + Convert this object into the underlying Rust structure. + + This is intended for internal use only. + + + + .. py:method:: with_comment(comment: str | None) -> CsvReadOptions + + Configure the comment character. + + + + .. py:method:: with_delimiter(delimiter: str) -> CsvReadOptions + + Configure the column delimiter. + + + + .. py:method:: with_escape(escape: str | None) -> CsvReadOptions + + Configure the escape character. + + + + .. py:method:: with_file_compression_type(file_compression_type: str) -> CsvReadOptions + + Configure file compression type. + + + + .. py:method:: with_file_extension(file_extension: str) -> CsvReadOptions + + Configure the file extension filter. + + + + .. py:method:: with_file_sort_order(file_sort_order: list[list[datafusion.expr.SortExpr]]) -> CsvReadOptions + + Configure file sort order. + + + + .. py:method:: with_has_header(has_header: bool) -> CsvReadOptions + + Configure whether the CSV has a header row. + + + + .. py:method:: with_newlines_in_values(newlines_in_values: bool) -> CsvReadOptions + + Configure whether newlines in values are supported. + + + + .. py:method:: with_null_regex(null_regex: str | None) -> CsvReadOptions + + Configure null value regex pattern. + + + + .. py:method:: with_quote(quote: str) -> CsvReadOptions + + Configure the quote character. + + + + .. py:method:: with_schema(schema: pyarrow.Schema | None) -> CsvReadOptions + + Configure the schema. + + + + .. py:method:: with_schema_infer_max_records(schema_infer_max_records: int) -> CsvReadOptions + + Configure maximum records for schema inference. + + + + .. py:method:: with_table_partition_cols(table_partition_cols: list[tuple[str, pyarrow.DataType]]) -> CsvReadOptions + + Configure table partition columns. + + + + .. py:method:: with_terminator(terminator: str | None) -> CsvReadOptions + + Configure the line terminator character. + + + + .. py:method:: with_truncated_rows(truncated_rows: bool) -> CsvReadOptions + + Configure whether to allow truncated rows. + + + + .. py:attribute:: comment + :value: None + + + + .. py:attribute:: delimiter + :value: ',' + + + + .. py:attribute:: escape + :value: None + + + + .. py:attribute:: file_compression_type + :value: '' + + + + .. py:attribute:: file_extension + :value: '.csv' + + + + .. py:attribute:: file_sort_order + :value: [] + + + + .. py:attribute:: has_header + :value: True + + + + .. py:attribute:: newlines_in_values + :value: False + + + + .. py:attribute:: null_regex + :value: None + + + + .. py:attribute:: quote + :value: '"' + + + + .. py:attribute:: schema + :value: None + + + + .. py:attribute:: schema_infer_max_records + :value: 1000 + + + + .. py:attribute:: table_partition_cols + :value: [] + + + + .. py:attribute:: terminator + :value: None + + + + .. py:attribute:: truncated_rows + :value: False + + + diff --git a/_sources/autoapi/datafusion/plan/index.rst.txt b/_sources/autoapi/datafusion/plan/index.rst.txt new file mode 100644 index 000000000..93af06184 --- /dev/null +++ b/_sources/autoapi/datafusion/plan/index.rst.txt @@ -0,0 +1,175 @@ +datafusion.plan +=============== + +.. py:module:: datafusion.plan + +.. autoapi-nested-parse:: + + This module supports physical and logical plans in DataFusion. + + + +Classes +------- + +.. autoapisummary:: + + datafusion.plan.ExecutionPlan + datafusion.plan.LogicalPlan + + +Module Contents +--------------- + +.. py:class:: ExecutionPlan(plan: datafusion._internal.ExecutionPlan) + + Represent nodes in the DataFusion Physical Plan. + + This constructor should not be called by the end user. + + + .. py:method:: __repr__() -> str + + Print a string representation of the physical plan. + + + + .. py:method:: children() -> list[ExecutionPlan] + + Get a list of children `ExecutionPlan` that act as inputs to this plan. + + The returned list will be empty for leaf nodes such as scans, will contain a + single value for unary nodes, or two values for binary nodes (such as joins). + + + + .. py:method:: display() -> str + + Print the physical plan. + + + + .. py:method:: display_indent() -> str + + Print an indented form of the physical plan. + + + + .. py:method:: from_proto(ctx: datafusion.context.SessionContext, data: bytes) -> ExecutionPlan + :staticmethod: + + + Create an ExecutionPlan from protobuf bytes. + + Tables created in memory from record batches are currently not supported. + + + + .. py:method:: to_proto() -> bytes + + Convert an ExecutionPlan into protobuf bytes. + + Tables created in memory from record batches are currently not supported. + + + + .. py:attribute:: _raw_plan + + + .. py:property:: partition_count + :type: int + + + Returns the number of partitions in the physical plan. + + +.. py:class:: LogicalPlan(plan: datafusion._internal.LogicalPlan) + + Logical Plan. + + A `LogicalPlan` is a node in a tree of relational operators (such as + Projection or Filter). + + Represents transforming an input relation (table) to an output relation + (table) with a potentially different schema. Plans form a dataflow tree + where data flows from leaves up to the root to produce the query result. + + A `LogicalPlan` can be created by the SQL query planner, the DataFrame API, + or programmatically (for example custom query languages). + + This constructor should not be called by the end user. + + + .. py:method:: __eq__(other: LogicalPlan) -> bool + + Test equality. + + + + .. py:method:: __repr__() -> str + + Generate a printable representation of the plan. + + + + .. py:method:: display() -> str + + Print the logical plan. + + + + .. py:method:: display_graphviz() -> str + + Print the graph visualization of the logical plan. + + Returns a `format`able structure that produces lines meant for graphical display + using the `DOT` language. This format can be visualized using software from + [`graphviz`](https://graphviz.org/) + + + + .. py:method:: display_indent() -> str + + Print an indented form of the logical plan. + + + + .. py:method:: display_indent_schema() -> str + + Print an indented form of the schema for the logical plan. + + + + .. py:method:: from_proto(ctx: datafusion.context.SessionContext, data: bytes) -> LogicalPlan + :staticmethod: + + + Create a LogicalPlan from protobuf bytes. + + Tables created in memory from record batches are currently not supported. + + + + .. py:method:: inputs() -> list[LogicalPlan] + + Returns the list of inputs to the logical plan. + + + + .. py:method:: to_proto() -> bytes + + Convert a LogicalPlan to protobuf bytes. + + Tables created in memory from record batches are currently not supported. + + + + .. py:method:: to_variant() -> Any + + Convert the logical plan into its specific variant. + + + + .. py:attribute:: _raw_plan + + diff --git a/_sources/autoapi/datafusion/record_batch/index.rst.txt b/_sources/autoapi/datafusion/record_batch/index.rst.txt new file mode 100644 index 000000000..f51a7e667 --- /dev/null +++ b/_sources/autoapi/datafusion/record_batch/index.rst.txt @@ -0,0 +1,106 @@ +datafusion.record_batch +======================= + +.. py:module:: datafusion.record_batch + +.. autoapi-nested-parse:: + + This module provides the classes for handling record batches. + + These are typically the result of dataframe + :py:func:`datafusion.dataframe.execute_stream` operations. + + + +Classes +------- + +.. autoapisummary:: + + datafusion.record_batch.RecordBatch + datafusion.record_batch.RecordBatchStream + + +Module Contents +--------------- + +.. py:class:: RecordBatch(record_batch: datafusion._internal.RecordBatch) + + This class is essentially a wrapper for :py:class:`pa.RecordBatch`. + + This constructor is generally not called by the end user. + + See the :py:class:`RecordBatchStream` iterator for generating this class. + + + .. py:method:: __arrow_c_array__(requested_schema: object | None = None) -> tuple[object, object] + + Export the record batch via the Arrow C Data Interface. + + This allows zero-copy interchange with libraries that support the + `Arrow PyCapsule interface `_. + + :param requested_schema: Attempt to provide the record batch using this + schema. Only straightforward projections such as column + selection or reordering are applied. + + :returns: Two Arrow PyCapsule objects representing the ``ArrowArray`` and + ``ArrowSchema``. + + + + .. py:method:: to_pyarrow() -> pyarrow.RecordBatch + + Convert to :py:class:`pa.RecordBatch`. + + + + .. py:attribute:: record_batch + + +.. py:class:: RecordBatchStream(record_batch_stream: datafusion._internal.RecordBatchStream) + + This class represents a stream of record batches. + + These are typically the result of a + :py:func:`~datafusion.dataframe.DataFrame.execute_stream` operation. + + This constructor is typically not called by the end user. + + + .. py:method:: __aiter__() -> typing_extensions.Self + + Return an asynchronous iterator over record batches. + + + + .. py:method:: __anext__() -> RecordBatch + :async: + + + Return the next :py:class:`RecordBatch` in the stream asynchronously. + + + + .. py:method:: __iter__() -> typing_extensions.Self + + Return an iterator over record batches. + + + + .. py:method:: __next__() -> RecordBatch + + Return the next :py:class:`RecordBatch` in the stream. + + + + .. py:method:: next() -> RecordBatch + + See :py:func:`__next__` for the iterator function. + + + + .. py:attribute:: rbs + + diff --git a/_sources/autoapi/datafusion/substrait/index.rst.txt b/_sources/autoapi/datafusion/substrait/index.rst.txt new file mode 100644 index 000000000..b9a1fd525 --- /dev/null +++ b/_sources/autoapi/datafusion/substrait/index.rst.txt @@ -0,0 +1,174 @@ +datafusion.substrait +==================== + +.. py:module:: datafusion.substrait + +.. autoapi-nested-parse:: + + This module provides support for using substrait with datafusion. + + For additional information about substrait, see https://substrait.io/ for more + information about substrait. + + + +Classes +------- + +.. autoapisummary:: + + datafusion.substrait.Consumer + datafusion.substrait.Plan + datafusion.substrait.Producer + datafusion.substrait.Serde + + +Module Contents +--------------- + +.. py:class:: Consumer + + Generates a logical plan from a substrait plan. + + + .. py:method:: from_substrait_plan(ctx: datafusion.context.SessionContext, plan: Plan) -> datafusion.plan.LogicalPlan + :staticmethod: + + + Convert a Substrait plan to a DataFusion LogicalPlan. + + :param ctx: SessionContext to use. + :param plan: Substrait plan to convert. + + :returns: LogicalPlan. + + + +.. py:class:: Plan(plan: datafusion._internal.substrait.Plan) + + A class representing an encodable substrait plan. + + Create a substrait plan. + + The user should not have to call this constructor directly. Rather, it + should be created via :py:class:`Serde` or py:class:`Producer` classes + in this module. + + + .. py:method:: encode() -> bytes + + Encode the plan to bytes. + + :returns: Encoded plan. + + + + .. py:method:: from_json(json: str) -> Plan + :staticmethod: + + + Parse a plan from a JSON string representation. + + :param json: JSON representation of a Substrait plan. + + :returns: Plan object representing the Substrait plan. + + + + .. py:method:: to_json() -> str + + Get the JSON representation of the Substrait plan. + + :returns: A JSON representation of the Substrait plan. + + + + .. py:attribute:: plan_internal + + +.. py:class:: Producer + + Generates substrait plans from a logical plan. + + + .. py:method:: to_substrait_plan(logical_plan: datafusion.plan.LogicalPlan, ctx: datafusion.context.SessionContext) -> Plan + :staticmethod: + + + Convert a DataFusion LogicalPlan to a Substrait plan. + + :param logical_plan: LogicalPlan to convert. + :param ctx: SessionContext to use. + + :returns: Substrait plan. + + + +.. py:class:: Serde + + Provides the ``Substrait`` serialization and deserialization. + + + .. py:method:: deserialize(path: str | pathlib.Path) -> Plan + :staticmethod: + + + Deserialize a Substrait plan from a file. + + :param path: Path to read the Substrait plan from. + + :returns: Substrait plan. + + + + .. py:method:: deserialize_bytes(proto_bytes: bytes) -> Plan + :staticmethod: + + + Deserialize a Substrait plan from bytes. + + :param proto_bytes: Bytes to read the Substrait plan from. + + :returns: Substrait plan. + + + + .. py:method:: serialize(sql: str, ctx: datafusion.context.SessionContext, path: str | pathlib.Path) -> None + :staticmethod: + + + Serialize a SQL query to a Substrait plan and write it to a file. + + :param sql: SQL query to serialize. + :param ctx: SessionContext to use. + :param path: Path to write the Substrait plan to. + + + + .. py:method:: serialize_bytes(sql: str, ctx: datafusion.context.SessionContext) -> bytes + :staticmethod: + + + Serialize a SQL query to a Substrait plan as bytes. + + :param sql: SQL query to serialize. + :param ctx: SessionContext to use. + + :returns: Substrait plan as bytes. + + + + .. py:method:: serialize_to_plan(sql: str, ctx: datafusion.context.SessionContext) -> Plan + :staticmethod: + + + Serialize a SQL query to a Substrait plan. + + Args: + sql: SQL query to serialize. + ctx: SessionContext to use. + + :returns: Substrait plan. + + + diff --git a/_sources/autoapi/datafusion/unparser/index.rst.txt b/_sources/autoapi/datafusion/unparser/index.rst.txt new file mode 100644 index 000000000..be2a35240 --- /dev/null +++ b/_sources/autoapi/datafusion/unparser/index.rst.txt @@ -0,0 +1,97 @@ +datafusion.unparser +=================== + +.. py:module:: datafusion.unparser + +.. autoapi-nested-parse:: + + This module provides support for unparsing datafusion plans to SQL. + + For additional information about unparsing, see https://docs.rs/datafusion-sql/latest/datafusion_sql/unparser/index.html + + + +Classes +------- + +.. autoapisummary:: + + datafusion.unparser.Dialect + datafusion.unparser.Unparser + + +Module Contents +--------------- + +.. py:class:: Dialect(dialect: datafusion._internal.unparser.Dialect) + + DataFusion data catalog. + + This constructor is not typically called by the end user. + + + .. py:method:: default() -> Dialect + :staticmethod: + + + Create a new default dialect. + + + + .. py:method:: duckdb() -> Dialect + :staticmethod: + + + Create a new DuckDB dialect. + + + + .. py:method:: mysql() -> Dialect + :staticmethod: + + + Create a new MySQL dialect. + + + + .. py:method:: postgres() -> Dialect + :staticmethod: + + + Create a new PostgreSQL dialect. + + + + .. py:method:: sqlite() -> Dialect + :staticmethod: + + + Create a new SQLite dialect. + + + + .. py:attribute:: dialect + + +.. py:class:: Unparser(dialect: Dialect) + + DataFusion unparser. + + This constructor is not typically called by the end user. + + + .. py:method:: plan_to_sql(plan: datafusion.plan.LogicalPlan) -> str + + Convert a logical plan to a SQL string. + + + + .. py:method:: with_pretty(pretty: bool) -> Unparser + + Set the pretty flag. + + + + .. py:attribute:: unparser + + diff --git a/_sources/autoapi/datafusion/user_defined/index.rst.txt b/_sources/autoapi/datafusion/user_defined/index.rst.txt new file mode 100644 index 000000000..21063829c --- /dev/null +++ b/_sources/autoapi/datafusion/user_defined/index.rst.txt @@ -0,0 +1,788 @@ +datafusion.user_defined +======================= + +.. py:module:: datafusion.user_defined + +.. autoapi-nested-parse:: + + Provides the user-defined functions for evaluation of dataframes. + + + +Attributes +---------- + +.. autoapisummary:: + + datafusion.user_defined._R + datafusion.user_defined.udaf + datafusion.user_defined.udf + datafusion.user_defined.udtf + datafusion.user_defined.udwf + + +Classes +------- + +.. autoapisummary:: + + datafusion.user_defined.Accumulator + datafusion.user_defined.AggregateUDF + datafusion.user_defined.AggregateUDFExportable + datafusion.user_defined.ScalarUDF + datafusion.user_defined.ScalarUDFExportable + datafusion.user_defined.TableFunction + datafusion.user_defined.Volatility + datafusion.user_defined.WindowEvaluator + datafusion.user_defined.WindowUDF + datafusion.user_defined.WindowUDFExportable + + +Functions +--------- + +.. autoapisummary:: + + datafusion.user_defined._is_pycapsule + datafusion.user_defined.data_type_or_field_to_field + datafusion.user_defined.data_types_or_fields_to_field_list + + +Module Contents +--------------- + +.. py:class:: Accumulator + + Defines how an :py:class:`AggregateUDF` accumulates values. + + + .. py:method:: evaluate() -> pyarrow.Scalar + :abstractmethod: + + + Return the resultant value. + + While this function template expects a PyArrow Scalar value return type, + you can return any value that can be converted into a Scalar. This + includes basic Python data types such as integers and strings. In + addition to primitive types, we currently support PyArrow, nanoarrow, + and arro3 objects in addition to primitive data types. Other objects + that support the Arrow FFI standard will be given a "best attempt" at + conversion to scalar objects. + + + + .. py:method:: merge(states: list[pyarrow.Array]) -> None + :abstractmethod: + + + Merge a set of states. + + + + .. py:method:: state() -> list[pyarrow.Scalar] + :abstractmethod: + + + Return the current state. + + While this function template expects PyArrow Scalar values return type, + you can return any value that can be converted into a Scalar. This + includes basic Python data types such as integers and strings. In + addition to primitive types, we currently support PyArrow, nanoarrow, + and arro3 objects in addition to primitive data types. Other objects + that support the Arrow FFI standard will be given a "best attempt" at + conversion to scalar objects. + + + + .. py:method:: update(*values: pyarrow.Array) -> None + :abstractmethod: + + + Evaluate an array of values and update state. + + + +.. py:class:: AggregateUDF(name: str, accumulator: collections.abc.Callable[[], Accumulator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, state_type: list[pyarrow.DataType], volatility: Volatility | str) + AggregateUDF(name: str, accumulator: AggregateUDFExportable, input_types: None = ..., return_type: None = ..., state_type: None = ..., volatility: None = ...) + + Class for performing scalar user-defined functions (UDF). + + Aggregate UDFs operate on a group of rows and return a single value. See + also :py:class:`ScalarUDF` for operating on a row by row basis. + + Instantiate a user-defined aggregate function (UDAF). + + See :py:func:`udaf` for a convenience function and argument + descriptions. + + + .. py:method:: __call__(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Execute the UDAF. + + This function is not typically called by an end user. These calls will + occur during the evaluation of the dataframe. + + + + .. py:method:: __repr__() -> str + + Print a string representation of the Aggregate UDF. + + + + .. py:method:: from_pycapsule(func: AggregateUDFExportable | _typeshed.CapsuleType) -> AggregateUDF + :staticmethod: + + + Create an Aggregate UDF from AggregateUDF PyCapsule object. + + This function will instantiate a Aggregate UDF that uses a DataFusion + AggregateUDF that is exported via the FFI bindings. + + + + .. py:method:: udaf(input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, state_type: list[pyarrow.DataType], volatility: Volatility | str, name: str | None = None) -> collections.abc.Callable[Ellipsis, AggregateUDF] + udaf(accum: collections.abc.Callable[[], Accumulator], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, state_type: list[pyarrow.DataType], volatility: Volatility | str, name: str | None = None) -> AggregateUDF + udaf(accum: AggregateUDFExportable) -> AggregateUDF + udaf(accum: _typeshed.CapsuleType) -> AggregateUDF + :staticmethod: + + + Create a new User-Defined Aggregate Function (UDAF). + + This class allows you to define an aggregate function that can be used in + data aggregation or window function calls. + + Usage: + - As a function: ``udaf(accum, input_types, return_type, state_type, volatility, name)``. + - As a decorator: ``@udaf(input_types, return_type, state_type, volatility, name)``. + When using ``udaf`` as a decorator, do not pass ``accum`` explicitly. + + Function example: + + If your :py:class:`Accumulator` can be instantiated with no arguments, you + can simply pass it's type as `accum`. If you need to pass additional + arguments to it's constructor, you can define a lambda or a factory method. + During runtime the :py:class:`Accumulator` will be constructed for every + instance in which this UDAF is used. The following examples are all valid:: + + import pyarrow as pa + import pyarrow.compute as pc + + class Summarize(Accumulator): + def __init__(self, bias: float = 0.0): + self._sum = pa.scalar(bias) + + def state(self) -> list[pa.Scalar]: + return [self._sum] + + def update(self, values: pa.Array) -> None: + self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py()) + + def merge(self, states: list[pa.Array]) -> None: + self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py()) + + def evaluate(self) -> pa.Scalar: + return self._sum + + def sum_bias_10() -> Summarize: + return Summarize(10.0) + + udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()], + "immutable") + udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()], + "immutable") + udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(), + [pa.float64()], "immutable") + + Decorator example::: + + @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable") + def udf4() -> Summarize: + return Summarize(10.0) + + :param accum: The accumulator python function. Only needed when calling as a + function. Skip this argument when using ``udaf`` as a decorator. + If you have a Rust backed AggregateUDF within a PyCapsule, you can + pass this parameter and ignore the rest. They will be determined + directly from the underlying function. See the online documentation + for more information. + :param input_types: The data types of the arguments to ``accum``. + :param return_type: The data type of the return value. + :param state_type: The data types of the intermediate accumulation. + :param volatility: See :py:class:`Volatility` for allowed values. + :param name: A descriptive name for the function. + + :returns: A user-defined aggregate function, which can be used in either data + aggregation or window function calls. + + + + .. py:attribute:: _udaf + + +.. py:class:: AggregateUDFExportable + + Bases: :py:obj:`Protocol` + + + Type hint for object that has __datafusion_aggregate_udf__ PyCapsule. + + + .. py:method:: __datafusion_aggregate_udf__() -> object + + +.. py:class:: ScalarUDF(name: str, func: collections.abc.Callable[Ellipsis, _R], input_fields: list[pyarrow.Field], return_field: _R, volatility: Volatility | str) + + Class for performing scalar user-defined functions (UDF). + + Scalar UDFs operate on a row by row basis. See also :py:class:`AggregateUDF` for + operating on a group of rows. + + Instantiate a scalar user-defined function (UDF). + + See helper method :py:func:`udf` for argument details. + + + .. py:method:: __call__(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Execute the UDF. + + This function is not typically called by an end user. These calls will + occur during the evaluation of the dataframe. + + + + .. py:method:: __repr__() -> str + + Print a string representation of the Scalar UDF. + + + + .. py:method:: from_pycapsule(func: ScalarUDFExportable) -> ScalarUDF + :staticmethod: + + + Create a Scalar UDF from ScalarUDF PyCapsule object. + + This function will instantiate a Scalar UDF that uses a DataFusion + ScalarUDF that is exported via the FFI bindings. + + + + .. py:method:: udf(input_fields: collections.abc.Sequence[pyarrow.DataType | pyarrow.Field] | pyarrow.DataType | pyarrow.Field, return_field: pyarrow.DataType | pyarrow.Field, volatility: Volatility | str, name: str | None = None) -> collections.abc.Callable[Ellipsis, ScalarUDF] + udf(func: collections.abc.Callable[Ellipsis, _R], input_fields: collections.abc.Sequence[pyarrow.DataType | pyarrow.Field] | pyarrow.DataType | pyarrow.Field, return_field: pyarrow.DataType | pyarrow.Field, volatility: Volatility | str, name: str | None = None) -> ScalarUDF + udf(func: ScalarUDFExportable) -> ScalarUDF + :staticmethod: + + + Create a new User-Defined Function (UDF). + + This class can be used both as either a function or a decorator. + + Usage: + - As a function: ``udf(func, input_fields, return_field, volatility, name)``. + - As a decorator: ``@udf(input_fields, return_field, volatility, name)``. + When used a decorator, do **not** pass ``func`` explicitly. + + In lieu of passing a PyArrow Field, you can pass a DataType for simplicity. + When you do so, it will be assumed that the nullability of the inputs and + output are True and that they have no metadata. + + :param func: Only needed when calling as a function. + Skip this argument when using `udf` as a decorator. If you have a Rust + backed ScalarUDF within a PyCapsule, you can pass this parameter + and ignore the rest. They will be determined directly from the + underlying function. See the online documentation for more information. + :type func: Callable, optional + :param input_fields: The data types or Fields + of the arguments to ``func``. This list must be of the same length + as the number of arguments. + :type input_fields: list[pa.Field | pa.DataType] + :param return_field: The field of the return value from the function. + :type return_field: _R + :param volatility: See `Volatility` for allowed values. + :type volatility: Volatility | str + :param name: A descriptive name for the function. + :type name: Optional[str] + + :returns: A user-defined function that can be used in SQL expressions, + data aggregation, or window function calls. + + Example: Using ``udf`` as a function:: + + def double_func(x): + return x * 2 + double_udf = udf(double_func, [pa.int32()], pa.int32(), + "volatile", "double_it") + + Example: Using ``udf`` as a decorator:: + + @udf([pa.int32()], pa.int32(), "volatile", "double_it") + def double_udf(x): + return x * 2 + + + + .. py:attribute:: _udf + + +.. py:class:: ScalarUDFExportable + + Bases: :py:obj:`Protocol` + + + Type hint for object that has __datafusion_scalar_udf__ PyCapsule. + + + .. py:method:: __datafusion_scalar_udf__() -> object + + +.. py:class:: TableFunction(name: str, func: collections.abc.Callable[[], any], ctx: datafusion.SessionContext | None = None) + + Class for performing user-defined table functions (UDTF). + + Table functions generate new table providers based on the + input expressions. + + Instantiate a user-defined table function (UDTF). + + See :py:func:`udtf` for a convenience function and argument + descriptions. + + + .. py:method:: __call__(*args: datafusion.expr.Expr) -> Any + + Execute the UDTF and return a table provider. + + + + .. py:method:: __repr__() -> str + + User printable representation. + + + + .. py:method:: _create_table_udf(func: collections.abc.Callable[Ellipsis, Any], name: str) -> TableFunction + :staticmethod: + + + Create a TableFunction instance from function arguments. + + + + .. py:method:: _create_table_udf_decorator(name: str | None = None) -> collections.abc.Callable[[collections.abc.Callable[[], WindowEvaluator]], collections.abc.Callable[Ellipsis, datafusion.expr.Expr]] + :staticmethod: + + + Create a decorator for a WindowUDF. + + + + .. py:method:: udtf(name: str) -> collections.abc.Callable[Ellipsis, Any] + udtf(func: collections.abc.Callable[[], Any], name: str) -> TableFunction + :staticmethod: + + + Create a new User-Defined Table Function (UDTF). + + + + .. py:attribute:: _udtf + + +.. py:class:: Volatility + + Bases: :py:obj:`enum.Enum` + + + Defines how stable or volatile a function is. + + When setting the volatility of a function, you can either pass this + enumeration or a ``str``. The ``str`` equivalent is the lower case value of the + name (`"immutable"`, `"stable"`, or `"volatile"`). + + + .. py:method:: __str__() -> str + + Returns the string equivalent. + + + + .. py:attribute:: Immutable + :value: 1 + + + An immutable function will always return the same output when given the + same input. + + DataFusion will attempt to inline immutable functions during planning. + + + .. py:attribute:: Stable + :value: 2 + + + Returns the same value for a given input within a single queries. + + A stable function may return different values given the same input across + different queries but must return the same value for a given input within a + query. An example of this is the ``Now`` function. DataFusion will attempt to + inline ``Stable`` functions during planning, when possible. For query + ``select col1, now() from t1``, it might take a while to execute but ``now()`` + column will be the same for each output row, which is evaluated during + planning. + + + .. py:attribute:: Volatile + :value: 3 + + + A volatile function may change the return value from evaluation to + evaluation. + + Multiple invocations of a volatile function may return different results + when used in the same query. An example of this is the random() function. + DataFusion can not evaluate such functions during planning. In the query + ``select col1, random() from t1``, ``random()`` function will be evaluated + for each output row, resulting in a unique random value for each row. + + +.. py:class:: WindowEvaluator + + Evaluator class for user-defined window functions (UDWF). + + It is up to the user to decide which evaluate function is appropriate. + + +------------------------+--------------------------------+------------------+---------------------------+ + | ``uses_window_frame`` | ``supports_bounded_execution`` | ``include_rank`` | function_to_implement | + +========================+================================+==================+===========================+ + | False (default) | False (default) | False (default) | ``evaluate_all`` | + +------------------------+--------------------------------+------------------+---------------------------+ + | False | True | False | ``evaluate`` | + +------------------------+--------------------------------+------------------+---------------------------+ + | False | True/False | True | ``evaluate_all_with_rank``| + +------------------------+--------------------------------+------------------+---------------------------+ + | True | True/False | True/False | ``evaluate`` | + +------------------------+--------------------------------+------------------+---------------------------+ + + + .. py:method:: evaluate(values: list[pyarrow.Array], eval_range: tuple[int, int]) -> pyarrow.Scalar + + Evaluate window function on a range of rows in an input partition. + + This is the simplest and most general function to implement + but also the least performant as it creates output one row at + a time. It is typically much faster to implement stateful + evaluation using one of the other specialized methods on this + trait. + + Returns a [`ScalarValue`] that is the value of the window + function within `range` for the entire partition. Argument + `values` contains the evaluation result of function arguments + and evaluation results of ORDER BY expressions. If function has a + single argument, `values[1..]` will contain ORDER BY expression results. + + + + .. py:method:: evaluate_all(values: list[pyarrow.Array], num_rows: int) -> pyarrow.Array + + Evaluate a window function on an entire input partition. + + This function is called once per input *partition* for window functions that + *do not use* values from the window frame, such as + :py:func:`~datafusion.functions.row_number`, + :py:func:`~datafusion.functions.rank`, + :py:func:`~datafusion.functions.dense_rank`, + :py:func:`~datafusion.functions.percent_rank`, + :py:func:`~datafusion.functions.cume_dist`, + :py:func:`~datafusion.functions.lead`, + and :py:func:`~datafusion.functions.lag`. + + It produces the result of all rows in a single pass. It + expects to receive the entire partition as the ``value`` and + must produce an output column with one output row for every + input row. + + ``num_rows`` is required to correctly compute the output in case + ``len(values) == 0`` + + Implementing this function is an optimization. Certain window + functions are not affected by the window frame definition or + the query doesn't have a frame, and ``evaluate`` skips the + (costly) window frame boundary calculation and the overhead of + calling ``evaluate`` for each output row. + + For example, the `LAG` built in window function does not use + the values of its window frame (it can be computed in one shot + on the entire partition with ``Self::evaluate_all`` regardless of the + window defined in the ``OVER`` clause) + + .. code-block:: text + + lag(x, 1) OVER (ORDER BY z ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING) + + However, ``avg()`` computes the average in the window and thus + does use its window frame. + + .. code-block:: text + + avg(x) OVER (PARTITION BY y ORDER BY z ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING) + + + + .. py:method:: evaluate_all_with_rank(num_rows: int, ranks_in_partition: list[tuple[int, int]]) -> pyarrow.Array + + Called for window functions that only need the rank of a row. + + Evaluate the partition evaluator against the partition using + the row ranks. For example, ``rank(col("a"))`` produces + + .. code-block:: text + + a | rank + - + ---- + A | 1 + A | 1 + C | 3 + D | 4 + D | 4 + + For this case, `num_rows` would be `5` and the + `ranks_in_partition` would be called with + + .. code-block:: text + + [ + (0,1), + (2,2), + (3,4), + ] + + The user must implement this method if ``include_rank`` returns True. + + + + .. py:method:: get_range(idx: int, num_rows: int) -> tuple[int, int] + + Return the range for the window function. + + If `uses_window_frame` flag is `false`. This method is used to + calculate required range for the window function during + stateful execution. + + Generally there is no required range, hence by default this + returns smallest range(current row). e.g seeing current row is + enough to calculate window result (such as row_number, rank, + etc) + + :param idx:: Current index: + :param num_rows: Number of rows. + + + + .. py:method:: include_rank() -> bool + + Can this function be evaluated with (only) rank? + + + + .. py:method:: is_causal() -> bool + + Get whether evaluator needs future data for its result. + + + + .. py:method:: memoize() -> None + + Perform a memoize operation to improve performance. + + When the window frame has a fixed beginning (e.g UNBOUNDED + PRECEDING), some functions such as FIRST_VALUE and + NTH_VALUE do not need the (unbounded) input once they have + seen a certain amount of input. + + `memoize` is called after each input batch is processed, and + such functions can save whatever they need + + + + .. py:method:: supports_bounded_execution() -> bool + + Can the window function be incrementally computed using bounded memory? + + + + .. py:method:: uses_window_frame() -> bool + + Does the window function use the values from the window frame? + + + +.. py:class:: WindowUDF(name: str, func: collections.abc.Callable[[], WindowEvaluator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str) + + Class for performing window user-defined functions (UDF). + + Window UDFs operate on a partition of rows. See + also :py:class:`ScalarUDF` for operating on a row by row basis. + + Instantiate a user-defined window function (UDWF). + + See :py:func:`udwf` for a convenience function and argument + descriptions. + + + .. py:method:: __call__(*args: datafusion.expr.Expr) -> datafusion.expr.Expr + + Execute the UDWF. + + This function is not typically called by an end user. These calls will + occur during the evaluation of the dataframe. + + + + .. py:method:: __repr__() -> str + + Print a string representation of the Window UDF. + + + + .. py:method:: _create_window_udf(func: collections.abc.Callable[[], WindowEvaluator], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) -> WindowUDF + :staticmethod: + + + Create a WindowUDF instance from function arguments. + + + + .. py:method:: _create_window_udf_decorator(input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) -> collections.abc.Callable[[collections.abc.Callable[[], WindowEvaluator]], collections.abc.Callable[Ellipsis, datafusion.expr.Expr]] + :staticmethod: + + + Create a decorator for a WindowUDF. + + + + .. py:method:: _get_default_name(func: collections.abc.Callable) -> str + :staticmethod: + + + Get the default name for a function based on its attributes. + + + + .. py:method:: _normalize_input_types(input_types: pyarrow.DataType | list[pyarrow.DataType]) -> list[pyarrow.DataType] + :staticmethod: + + + Convert a single DataType to a list if needed. + + + + .. py:method:: from_pycapsule(func: WindowUDFExportable) -> WindowUDF + :staticmethod: + + + Create a Window UDF from WindowUDF PyCapsule object. + + This function will instantiate a Window UDF that uses a DataFusion + WindowUDF that is exported via the FFI bindings. + + + + .. py:method:: udwf(input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) -> collections.abc.Callable[Ellipsis, WindowUDF] + udwf(func: collections.abc.Callable[[], WindowEvaluator], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) -> WindowUDF + :staticmethod: + + + Create a new User-Defined Window Function (UDWF). + + This class can be used both as either a function or a decorator. + + Usage: + - As a function: ``udwf(func, input_types, return_type, volatility, name)``. + - As a decorator: ``@udwf(input_types, return_type, volatility, name)``. + When using ``udwf`` as a decorator, do not pass ``func`` explicitly. + + Function example:: + + import pyarrow as pa + + class BiasedNumbers(WindowEvaluator): + def __init__(self, start: int = 0) -> None: + self.start = start + + def evaluate_all(self, values: list[pa.Array], + num_rows: int) -> pa.Array: + return pa.array([self.start + i for i in range(num_rows)]) + + def bias_10() -> BiasedNumbers: + return BiasedNumbers(10) + + udwf1 = udwf(BiasedNumbers, pa.int64(), pa.int64(), "immutable") + udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable") + udwf3 = udwf(lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable") + + + Decorator example:: + + @udwf(pa.int64(), pa.int64(), "immutable") + def biased_numbers() -> BiasedNumbers: + return BiasedNumbers(10) + + :param func: Only needed when calling as a function. Skip this argument when + using ``udwf`` as a decorator. If you have a Rust backed WindowUDF + within a PyCapsule, you can pass this parameter and ignore the rest. + They will be determined directly from the underlying function. See + the online documentation for more information. + :param input_types: The data types of the arguments. + :param return_type: The data type of the return value. + :param volatility: See :py:class:`Volatility` for allowed values. + :param name: A descriptive name for the function. + + :returns: A user-defined window function that can be used in window function calls. + + + + .. py:attribute:: _udwf + + +.. py:class:: WindowUDFExportable + + Bases: :py:obj:`Protocol` + + + Type hint for object that has __datafusion_window_udf__ PyCapsule. + + + .. py:method:: __datafusion_window_udf__() -> object + + +.. py:function:: _is_pycapsule(value: object) -> TypeGuard[_typeshed.CapsuleType] + + Return ``True`` when ``value`` is a CPython ``PyCapsule``. + + +.. py:function:: data_type_or_field_to_field(value: pyarrow.DataType | pyarrow.Field, name: str) -> pyarrow.Field + + Helper function to return a Field from either a Field or DataType. + + +.. py:function:: data_types_or_fields_to_field_list(inputs: collections.abc.Sequence[pyarrow.Field | pyarrow.DataType] | pyarrow.Field | pyarrow.DataType) -> list[pyarrow.Field] + + Helper function to return a list of Fields. + + +.. py:data:: _R + +.. py:data:: udaf + +.. py:data:: udf + +.. py:data:: udtf + +.. py:data:: udwf + diff --git a/_sources/autoapi/index.rst.txt b/_sources/autoapi/index.rst.txt new file mode 100644 index 000000000..5c5423444 --- /dev/null +++ b/_sources/autoapi/index.rst.txt @@ -0,0 +1,11 @@ +API Reference +============= + +This page contains auto-generated API reference documentation [#f1]_. + +.. toctree:: + :titlesonly: + + /autoapi/datafusion/index + +.. [#f1] Created with `sphinx-autoapi `_ \ No newline at end of file diff --git a/docs/source/contributor-guide/ffi.rst b/_sources/contributor-guide/ffi.rst.txt similarity index 96% rename from docs/source/contributor-guide/ffi.rst rename to _sources/contributor-guide/ffi.rst.txt index e0158e0a2..5006b0ca4 100644 --- a/docs/source/contributor-guide/ffi.rst +++ b/_sources/contributor-guide/ffi.rst.txt @@ -156,7 +156,7 @@ instead of mutating the container directly: .. code-block:: rust - #[pyclass(from_py_object, name = "Config", module = "datafusion", subclass, frozen)] + #[pyclass(name = "Config", module = "datafusion", subclass, frozen)] #[derive(Clone)] pub(crate) struct PyConfig { config: Arc>, @@ -170,7 +170,7 @@ existing instance in place: .. code-block:: rust - #[pyclass(from_py_object, frozen, name = "SessionContext", module = "datafusion", subclass)] + #[pyclass(frozen, name = "SessionContext", module = "datafusion", subclass)] #[derive(Clone)] pub struct PySessionContext { pub ctx: SessionContext, @@ -186,7 +186,7 @@ field updates: // TODO: This looks like this needs pyo3 tracking so leaving unfrozen for now #[derive(Debug, Clone)] - #[pyclass(from_py_object, name = "DataTypeMap", module = "datafusion.common", subclass)] + #[pyclass(name = "DataTypeMap", module = "datafusion.common", subclass)] pub struct DataTypeMap { #[pyo3(get, set)] pub arrow_type: PyDataType, @@ -232,11 +232,8 @@ can then be turned into a ``ForeignTableProvider`` the associated code is: .. code-block:: rust - let capsule = capsule.cast::()?; - let data: NonNull = capsule - .pointer_checked(Some(name))? - .cast(); - let codec = unsafe { data.as_ref() }; + let capsule = capsule.downcast::()?; + let provider = unsafe { capsule.reference::() }; By convention the ``datafusion-python`` library expects a Python object that has a ``TableProvider`` PyCapsule to have this capsule accessible by calling a function named diff --git a/docs/source/contributor-guide/introduction.rst b/_sources/contributor-guide/introduction.rst.txt similarity index 100% rename from docs/source/contributor-guide/introduction.rst rename to _sources/contributor-guide/introduction.rst.txt diff --git a/docs/source/index.rst b/_sources/index.rst.txt similarity index 100% rename from docs/source/index.rst rename to _sources/index.rst.txt diff --git a/docs/source/user-guide/basics.rst b/_sources/user-guide/basics.rst.txt similarity index 100% rename from docs/source/user-guide/basics.rst rename to _sources/user-guide/basics.rst.txt diff --git a/docs/source/user-guide/common-operations/aggregations.rst b/_sources/user-guide/common-operations/aggregations.rst.txt similarity index 100% rename from docs/source/user-guide/common-operations/aggregations.rst rename to _sources/user-guide/common-operations/aggregations.rst.txt diff --git a/docs/source/user-guide/common-operations/basic-info.rst b/_sources/user-guide/common-operations/basic-info.rst.txt similarity index 100% rename from docs/source/user-guide/common-operations/basic-info.rst rename to _sources/user-guide/common-operations/basic-info.rst.txt diff --git a/docs/source/user-guide/common-operations/expressions.rst b/_sources/user-guide/common-operations/expressions.rst.txt similarity index 100% rename from docs/source/user-guide/common-operations/expressions.rst rename to _sources/user-guide/common-operations/expressions.rst.txt diff --git a/docs/source/user-guide/common-operations/functions.rst b/_sources/user-guide/common-operations/functions.rst.txt similarity index 100% rename from docs/source/user-guide/common-operations/functions.rst rename to _sources/user-guide/common-operations/functions.rst.txt diff --git a/docs/source/user-guide/common-operations/index.rst b/_sources/user-guide/common-operations/index.rst.txt similarity index 100% rename from docs/source/user-guide/common-operations/index.rst rename to _sources/user-guide/common-operations/index.rst.txt diff --git a/docs/source/user-guide/common-operations/joins.rst b/_sources/user-guide/common-operations/joins.rst.txt similarity index 100% rename from docs/source/user-guide/common-operations/joins.rst rename to _sources/user-guide/common-operations/joins.rst.txt diff --git a/docs/source/user-guide/common-operations/select-and-filter.rst b/_sources/user-guide/common-operations/select-and-filter.rst.txt similarity index 100% rename from docs/source/user-guide/common-operations/select-and-filter.rst rename to _sources/user-guide/common-operations/select-and-filter.rst.txt diff --git a/docs/source/user-guide/common-operations/udf-and-udfa.rst b/_sources/user-guide/common-operations/udf-and-udfa.rst.txt similarity index 100% rename from docs/source/user-guide/common-operations/udf-and-udfa.rst rename to _sources/user-guide/common-operations/udf-and-udfa.rst.txt diff --git a/docs/source/user-guide/common-operations/views.rst b/_sources/user-guide/common-operations/views.rst.txt similarity index 100% rename from docs/source/user-guide/common-operations/views.rst rename to _sources/user-guide/common-operations/views.rst.txt diff --git a/docs/source/user-guide/common-operations/windows.rst b/_sources/user-guide/common-operations/windows.rst.txt similarity index 100% rename from docs/source/user-guide/common-operations/windows.rst rename to _sources/user-guide/common-operations/windows.rst.txt diff --git a/docs/source/user-guide/configuration.rst b/_sources/user-guide/configuration.rst.txt similarity index 100% rename from docs/source/user-guide/configuration.rst rename to _sources/user-guide/configuration.rst.txt diff --git a/docs/source/user-guide/data-sources.rst b/_sources/user-guide/data-sources.rst.txt similarity index 100% rename from docs/source/user-guide/data-sources.rst rename to _sources/user-guide/data-sources.rst.txt diff --git a/docs/source/user-guide/dataframe/index.rst b/_sources/user-guide/dataframe/index.rst.txt similarity index 100% rename from docs/source/user-guide/dataframe/index.rst rename to _sources/user-guide/dataframe/index.rst.txt diff --git a/docs/source/user-guide/dataframe/rendering.rst b/_sources/user-guide/dataframe/rendering.rst.txt similarity index 100% rename from docs/source/user-guide/dataframe/rendering.rst rename to _sources/user-guide/dataframe/rendering.rst.txt diff --git a/docs/source/user-guide/introduction.rst b/_sources/user-guide/introduction.rst.txt similarity index 100% rename from docs/source/user-guide/introduction.rst rename to _sources/user-guide/introduction.rst.txt diff --git a/docs/source/user-guide/io/arrow.rst b/_sources/user-guide/io/arrow.rst.txt similarity index 100% rename from docs/source/user-guide/io/arrow.rst rename to _sources/user-guide/io/arrow.rst.txt diff --git a/docs/source/user-guide/io/avro.rst b/_sources/user-guide/io/avro.rst.txt similarity index 100% rename from docs/source/user-guide/io/avro.rst rename to _sources/user-guide/io/avro.rst.txt diff --git a/docs/source/user-guide/io/csv.rst b/_sources/user-guide/io/csv.rst.txt similarity index 100% rename from docs/source/user-guide/io/csv.rst rename to _sources/user-guide/io/csv.rst.txt diff --git a/docs/source/user-guide/io/index.rst b/_sources/user-guide/io/index.rst.txt similarity index 100% rename from docs/source/user-guide/io/index.rst rename to _sources/user-guide/io/index.rst.txt diff --git a/docs/source/user-guide/io/json.rst b/_sources/user-guide/io/json.rst.txt similarity index 100% rename from docs/source/user-guide/io/json.rst rename to _sources/user-guide/io/json.rst.txt diff --git a/docs/source/user-guide/io/parquet.rst b/_sources/user-guide/io/parquet.rst.txt similarity index 100% rename from docs/source/user-guide/io/parquet.rst rename to _sources/user-guide/io/parquet.rst.txt diff --git a/docs/source/user-guide/io/table_provider.rst b/_sources/user-guide/io/table_provider.rst.txt similarity index 100% rename from docs/source/user-guide/io/table_provider.rst rename to _sources/user-guide/io/table_provider.rst.txt diff --git a/docs/source/user-guide/sql.rst b/_sources/user-guide/sql.rst.txt similarity index 100% rename from docs/source/user-guide/sql.rst rename to _sources/user-guide/sql.rst.txt diff --git a/docs/source/user-guide/upgrade-guides.rst b/_sources/user-guide/upgrade-guides.rst.txt similarity index 87% rename from docs/source/user-guide/upgrade-guides.rst rename to _sources/user-guide/upgrade-guides.rst.txt index e3d7c2d87..a77f60776 100644 --- a/docs/source/user-guide/upgrade-guides.rst +++ b/_sources/user-guide/upgrade-guides.rst.txt @@ -18,27 +18,6 @@ Upgrade Guides ============== -DataFusion 53.0.0 ------------------ - -This version includes an upgraded version of ``pyo3``, which changed the way to extract an FFI -object. Example: - -Before: - -.. code-block:: rust - - let codec = unsafe { capsule.reference::() }; - -Now: - -.. code-block:: rust - - let data: NonNull = capsule - .pointer_checked(Some(c_str!("datafusion_logical_extension_codec")))? - .cast(); - let codec = unsafe { data.as_ref() }; - DataFusion 52.0.0 ----------------- diff --git a/docs/source/_static/images/2x_bgwhite_original.png b/_static/2x_bgwhite_original.png similarity index 100% rename from docs/source/_static/images/2x_bgwhite_original.png rename to _static/2x_bgwhite_original.png diff --git a/_static/basic.css b/_static/basic.css new file mode 100644 index 000000000..2a9e4114a --- /dev/null +++ b/_static/basic.css @@ -0,0 +1,914 @@ +/* + * Sphinx stylesheet -- basic theme. + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +div.section::after { + display: block; + content: ''; + clear: left; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 270px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox form.search { + overflow: hidden; +} + +div.sphinxsidebar #searchbox input[type="text"] { + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; +} + +div.sphinxsidebar #searchbox input[type="submit"] { + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; +} + + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin-top: 10px; +} + +ul.search li { + padding: 5px 0; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li p.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body { + min-width: 360px; + max-width: 800px; +} + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +a:visited { + color: #551A8B; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, figure.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, figure.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, figure.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +img.align-default, figure.align-default, .figure.align-default { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-default { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar, +aside.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px; + background-color: #ffe; + width: 40%; + float: right; + clear: right; + overflow-x: auto; +} + +p.sidebar-title { + font-weight: bold; +} + +nav.contents, +aside.topic, +div.admonition, div.topic, blockquote { + clear: left; +} + +/* -- topics ---------------------------------------------------------------- */ + +nav.contents, +aside.topic, +div.topic { + border: 1px solid #ccc; + padding: 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- content of sidebars/topics/admonitions -------------------------------- */ + +div.sidebar > :last-child, +aside.sidebar > :last-child, +nav.contents > :last-child, +aside.topic > :last-child, +div.topic > :last-child, +div.admonition > :last-child { + margin-bottom: 0; +} + +div.sidebar::after, +aside.sidebar::after, +nav.contents::after, +aside.topic::after, +div.topic::after, +div.admonition::after, +blockquote::after { + display: block; + content: ''; + clear: both; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + margin-top: 10px; + margin-bottom: 10px; + border: 0; + border-collapse: collapse; +} + +table.align-center { + margin-left: auto; + margin-right: auto; +} + +table.align-default { + margin-left: auto; + margin-right: auto; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +th > :first-child, +td > :first-child { + margin-top: 0px; +} + +th > :last-child, +td > :last-child { + margin-bottom: 0px; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure, figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption, figcaption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number, +figcaption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text, +figcaption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +/* -- hlist styles ---------------------------------------------------------- */ + +table.hlist { + margin: 1em 0; +} + +table.hlist td { + vertical-align: top; +} + +/* -- object description styles --------------------------------------------- */ + +.sig { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; +} + +.sig-name, code.descname { + background-color: transparent; + font-weight: bold; +} + +.sig-name { + font-size: 1.1em; +} + +code.descname { + font-size: 1.2em; +} + +.sig-prename, code.descclassname { + background-color: transparent; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.sig-param.n { + font-style: italic; +} + +/* C++ specific styling */ + +.sig-inline.c-texpr, +.sig-inline.cpp-texpr { + font-family: unset; +} + +.sig.c .k, .sig.c .kt, +.sig.cpp .k, .sig.cpp .kt { + color: #0033B3; +} + +.sig.c .m, +.sig.cpp .m { + color: #1750EB; +} + +.sig.c .s, .sig.c .sc, +.sig.cpp .s, .sig.cpp .sc { + color: #067D17; +} + + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +:not(li) > ol > li:first-child > :first-child, +:not(li) > ul > li:first-child > :first-child { + margin-top: 0px; +} + +:not(li) > ol > li:last-child > :last-child, +:not(li) > ul > li:last-child > :last-child { + margin-bottom: 0px; +} + +ol.simple ol p, +ol.simple ul p, +ul.simple ol p, +ul.simple ul p { + margin-top: 0; +} + +ol.simple > li:not(:first-child) > p, +ul.simple > li:not(:first-child) > p { + margin-top: 0; +} + +ol.simple p, +ul.simple p { + margin-bottom: 0; +} + +aside.footnote > span, +div.citation > span { + float: left; +} +aside.footnote > span:last-of-type, +div.citation > span:last-of-type { + padding-right: 0.5em; +} +aside.footnote > p { + margin-left: 2em; +} +div.citation > p { + margin-left: 4em; +} +aside.footnote > p:last-of-type, +div.citation > p:last-of-type { + margin-bottom: 0em; +} +aside.footnote > p:last-of-type:after, +div.citation > p:last-of-type:after { + content: ""; + clear: both; +} + +dl.field-list { + display: grid; + grid-template-columns: fit-content(30%) auto; +} + +dl.field-list > dt { + font-weight: bold; + word-break: break-word; + padding-left: 0.5em; + padding-right: 5px; +} + +dl.field-list > dd { + padding-left: 0.5em; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0em; +} + +dl { + margin-bottom: 15px; +} + +dd > :first-child { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +.sig dd { + margin-top: 0px; + margin-bottom: 0px; +} + +.sig dl { + margin-top: 0px; + margin-bottom: 0px; +} + +dl > dd:last-child, +dl > dd:last-child > :last-child { + margin-bottom: 0; +} + +dt:target, span.highlighted { + background-color: #fbe54e; +} + +rect.highlighted { + fill: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +.classifier:before { + font-style: normal; + margin: 0 0.5em; + content: ":"; + display: inline-block; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +.translated { + background-color: rgba(207, 255, 207, 0.2) +} + +.untranslated { + background-color: rgba(255, 207, 207, 0.2) +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +pre, div[class*="highlight-"] { + clear: both; +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; + white-space: nowrap; +} + +div[class*="highlight-"] { + margin: 1em 0; +} + +td.linenos pre { + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + display: block; +} + +table.highlighttable tbody { + display: block; +} + +table.highlighttable tr { + display: flex; +} + +table.highlighttable td { + margin: 0; + padding: 0; +} + +table.highlighttable td.linenos { + padding-right: 0.5em; +} + +table.highlighttable td.code { + flex: 1; + overflow: hidden; +} + +.highlight .hll { + display: block; +} + +div.highlight pre, +table.highlighttable pre { + margin: 0; +} + +div.code-block-caption + div { + margin-top: 0; +} + +div.code-block-caption { + margin-top: 1em; + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +table.highlighttable td.linenos, +span.linenos, +div.highlight span.gp { /* gp: Generic.Prompt */ + user-select: none; + -webkit-user-select: text; /* Safari fallback only */ + -webkit-user-select: none; /* Chrome/Safari */ + -moz-user-select: none; /* Firefox */ + -ms-user-select: none; /* IE10+ */ +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + margin: 1em 0; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: absolute; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/_static/doctools.js b/_static/doctools.js new file mode 100644 index 000000000..0398ebb9f --- /dev/null +++ b/_static/doctools.js @@ -0,0 +1,149 @@ +/* + * Base JavaScript utilities for all Sphinx HTML documentation. + */ +"use strict"; + +const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ + "TEXTAREA", + "INPUT", + "SELECT", + "BUTTON", +]); + +const _ready = (callback) => { + if (document.readyState !== "loading") { + callback(); + } else { + document.addEventListener("DOMContentLoaded", callback); + } +}; + +/** + * Small JavaScript module for the documentation. + */ +const Documentation = { + init: () => { + Documentation.initDomainIndexTable(); + Documentation.initOnKeyListeners(); + }, + + /** + * i18n support + */ + TRANSLATIONS: {}, + PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), + LOCALE: "unknown", + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext: (string) => { + const translated = Documentation.TRANSLATIONS[string]; + switch (typeof translated) { + case "undefined": + return string; // no translation + case "string": + return translated; // translation exists + default: + return translated[0]; // (singular, plural) translation tuple exists + } + }, + + ngettext: (singular, plural, n) => { + const translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated !== "undefined") + return translated[Documentation.PLURAL_EXPR(n)]; + return n === 1 ? singular : plural; + }, + + addTranslations: (catalog) => { + Object.assign(Documentation.TRANSLATIONS, catalog.messages); + Documentation.PLURAL_EXPR = new Function( + "n", + `return (${catalog.plural_expr})` + ); + Documentation.LOCALE = catalog.locale; + }, + + /** + * helper function to focus on search bar + */ + focusSearchBar: () => { + document.querySelectorAll("input[name=q]")[0]?.focus(); + }, + + /** + * Initialise the domain index toggle buttons + */ + initDomainIndexTable: () => { + const toggler = (el) => { + const idNumber = el.id.substr(7); + const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); + if (el.src.substr(-9) === "minus.png") { + el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; + toggledRows.forEach((el) => (el.style.display = "none")); + } else { + el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; + toggledRows.forEach((el) => (el.style.display = "")); + } + }; + + const togglerElements = document.querySelectorAll("img.toggler"); + togglerElements.forEach((el) => + el.addEventListener("click", (event) => toggler(event.currentTarget)) + ); + togglerElements.forEach((el) => (el.style.display = "")); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); + }, + + initOnKeyListeners: () => { + // only install a listener if it is really needed + if ( + !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && + !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS + ) + return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.altKey || event.ctrlKey || event.metaKey) return; + + if (!event.shiftKey) { + switch (event.key) { + case "ArrowLeft": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const prevLink = document.querySelector('link[rel="prev"]'); + if (prevLink && prevLink.href) { + window.location.href = prevLink.href; + event.preventDefault(); + } + break; + case "ArrowRight": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const nextLink = document.querySelector('link[rel="next"]'); + if (nextLink && nextLink.href) { + window.location.href = nextLink.href; + event.preventDefault(); + } + break; + } + } + + // some keyboard layouts may need Shift to get / + switch (event.key) { + case "/": + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; + Documentation.focusSearchBar(); + event.preventDefault(); + } + }); + }, +}; + +// quick alias for translations +const _ = Documentation.gettext; + +_ready(Documentation.init); diff --git a/_static/documentation_options.js b/_static/documentation_options.js new file mode 100644 index 000000000..f38abe9a8 --- /dev/null +++ b/_static/documentation_options.js @@ -0,0 +1,13 @@ +const DOCUMENTATION_OPTIONS = { + VERSION: '', + LANGUAGE: 'en', + COLLAPSE_INDEX: false, + BUILDER: 'html', + FILE_SUFFIX: '.html', + LINK_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt', + NAVIGATION_WITH_KEYS: true, + SHOW_SEARCH_SUMMARY: true, + ENABLE_SEARCH_SHORTCUTS: true, +}; \ No newline at end of file diff --git a/_static/file.png b/_static/file.png new file mode 100644 index 000000000..a858a410e Binary files /dev/null and b/_static/file.png differ diff --git a/_static/graphviz.css b/_static/graphviz.css new file mode 100644 index 000000000..30f3837b6 --- /dev/null +++ b/_static/graphviz.css @@ -0,0 +1,12 @@ +/* + * Sphinx stylesheet -- graphviz extension. + */ + +img.graphviz { + border: 0; + max-width: 100%; +} + +object.graphviz { + max-width: 100%; +} diff --git a/_static/images/2x_bgwhite_original.png b/_static/images/2x_bgwhite_original.png new file mode 100644 index 000000000..abb5fca6e Binary files /dev/null and b/_static/images/2x_bgwhite_original.png differ diff --git a/docs/source/_static/images/original.png b/_static/images/original.png similarity index 100% rename from docs/source/_static/images/original.png rename to _static/images/original.png diff --git a/docs/source/_static/images/original.svg b/_static/images/original.svg similarity index 100% rename from docs/source/_static/images/original.svg rename to _static/images/original.svg diff --git a/docs/source/_static/images/original2x.png b/_static/images/original2x.png similarity index 100% rename from docs/source/_static/images/original2x.png rename to _static/images/original2x.png diff --git a/_static/language_data.js b/_static/language_data.js new file mode 100644 index 000000000..c7fe6c6fa --- /dev/null +++ b/_static/language_data.js @@ -0,0 +1,192 @@ +/* + * This script contains the language-specific data used by searchtools.js, + * namely the list of stopwords, stemmer, scorer and splitter. + */ + +var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; + + +/* Non-minified version is copied as a separate JS file, if available */ + +/** + * Porter Stemmer + */ +var Stemmer = function() { + + var step2list = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + }; + + var step3list = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + }; + + var c = "[^aeiou]"; // consonant + var v = "[aeiouy]"; // vowel + var C = c + "[^aeiouy]*"; // consonant sequence + var V = v + "[aeiou]*"; // vowel sequence + + var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + diff --git a/_static/minus.png b/_static/minus.png new file mode 100644 index 000000000..d96755fda Binary files /dev/null and b/_static/minus.png differ diff --git a/_static/plus.png b/_static/plus.png new file mode 100644 index 000000000..7107cec93 Binary files /dev/null and b/_static/plus.png differ diff --git a/_static/pygments.css b/_static/pygments.css new file mode 100644 index 000000000..041d38c7c --- /dev/null +++ b/_static/pygments.css @@ -0,0 +1,84 @@ +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #f8f8f8; } +.highlight .c { color: #8F5902; font-style: italic } /* Comment */ +.highlight .err { color: #A40000; border: 1px solid #EF2929 } /* Error */ +.highlight .g { color: #000 } /* Generic */ +.highlight .k { color: #204A87; font-weight: bold } /* Keyword */ +.highlight .l { color: #000 } /* Literal */ +.highlight .n { color: #000 } /* Name */ +.highlight .o { color: #CE5C00; font-weight: bold } /* Operator */ +.highlight .x { color: #000 } /* Other */ +.highlight .p { color: #000; font-weight: bold } /* Punctuation */ +.highlight .ch { color: #8F5902; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #8F5902; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #8F5902; font-style: italic } /* Comment.Preproc */ +.highlight .cpf { color: #8F5902; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #8F5902; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #8F5902; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #A40000 } /* Generic.Deleted */ +.highlight .ge { color: #000; font-style: italic } /* Generic.Emph */ +.highlight .ges { color: #000; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +.highlight .gr { color: #EF2929 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #000; font-style: italic } /* Generic.Output */ +.highlight .gp { color: #8F5902 } /* Generic.Prompt */ +.highlight .gs { color: #000; font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #A40000; font-weight: bold } /* Generic.Traceback */ +.highlight .kc { color: #204A87; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #204A87; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #204A87; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #204A87; font-weight: bold } /* Keyword.Pseudo */ +.highlight .kr { color: #204A87; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #204A87; font-weight: bold } /* Keyword.Type */ +.highlight .ld { color: #000 } /* Literal.Date */ +.highlight .m { color: #0000CF; font-weight: bold } /* Literal.Number */ +.highlight .s { color: #4E9A06 } /* Literal.String */ +.highlight .na { color: #C4A000 } /* Name.Attribute */ +.highlight .nb { color: #204A87 } /* Name.Builtin */ +.highlight .nc { color: #000 } /* Name.Class */ +.highlight .no { color: #000 } /* Name.Constant */ +.highlight .nd { color: #5C35CC; font-weight: bold } /* Name.Decorator */ +.highlight .ni { color: #CE5C00 } /* Name.Entity */ +.highlight .ne { color: #C00; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #000 } /* Name.Function */ +.highlight .nl { color: #F57900 } /* Name.Label */ +.highlight .nn { color: #000 } /* Name.Namespace */ +.highlight .nx { color: #000 } /* Name.Other */ +.highlight .py { color: #000 } /* Name.Property */ +.highlight .nt { color: #204A87; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #000 } /* Name.Variable */ +.highlight .ow { color: #204A87; font-weight: bold } /* Operator.Word */ +.highlight .pm { color: #000; font-weight: bold } /* Punctuation.Marker */ +.highlight .w { color: #F8F8F8 } /* Text.Whitespace */ +.highlight .mb { color: #0000CF; font-weight: bold } /* Literal.Number.Bin */ +.highlight .mf { color: #0000CF; font-weight: bold } /* Literal.Number.Float */ +.highlight .mh { color: #0000CF; font-weight: bold } /* Literal.Number.Hex */ +.highlight .mi { color: #0000CF; font-weight: bold } /* Literal.Number.Integer */ +.highlight .mo { color: #0000CF; font-weight: bold } /* Literal.Number.Oct */ +.highlight .sa { color: #4E9A06 } /* Literal.String.Affix */ +.highlight .sb { color: #4E9A06 } /* Literal.String.Backtick */ +.highlight .sc { color: #4E9A06 } /* Literal.String.Char */ +.highlight .dl { color: #4E9A06 } /* Literal.String.Delimiter */ +.highlight .sd { color: #8F5902; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #4E9A06 } /* Literal.String.Double */ +.highlight .se { color: #4E9A06 } /* Literal.String.Escape */ +.highlight .sh { color: #4E9A06 } /* Literal.String.Heredoc */ +.highlight .si { color: #4E9A06 } /* Literal.String.Interpol */ +.highlight .sx { color: #4E9A06 } /* Literal.String.Other */ +.highlight .sr { color: #4E9A06 } /* Literal.String.Regex */ +.highlight .s1 { color: #4E9A06 } /* Literal.String.Single */ +.highlight .ss { color: #4E9A06 } /* Literal.String.Symbol */ +.highlight .bp { color: #3465A4 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #000 } /* Name.Function.Magic */ +.highlight .vc { color: #000 } /* Name.Variable.Class */ +.highlight .vg { color: #000 } /* Name.Variable.Global */ +.highlight .vi { color: #000 } /* Name.Variable.Instance */ +.highlight .vm { color: #000 } /* Name.Variable.Magic */ +.highlight .il { color: #0000CF; font-weight: bold } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/_static/scripts/pydata-sphinx-theme.js b/_static/scripts/pydata-sphinx-theme.js new file mode 100644 index 000000000..0e00c4cad --- /dev/null +++ b/_static/scripts/pydata-sphinx-theme.js @@ -0,0 +1,32 @@ +!function(t){var e={};function n(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return t[i].call(o.exports,o,o.exports,n),o.l=!0,o.exports}n.m=t,n.c=e,n.d=function(t,e,i){n.o(t,e)||Object.defineProperty(t,e,{enumerable:!0,get:i})},n.r=function(t){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(t,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(t,"__esModule",{value:!0})},n.t=function(t,e){if(1&e&&(t=n(t)),8&e)return t;if(4&e&&"object"==typeof t&&t&&t.__esModule)return t;var i=Object.create(null);if(n.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:t}),2&e&&"string"!=typeof t)for(var o in t)n.d(i,o,function(e){return t[e]}.bind(null,o));return i},n.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return n.d(e,"a",e),e},n.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},n.p="",n(n.s=2)}([function(t,e){t.exports=jQuery},function(t,e,n){"use strict";n.r(e),function(t){ +/**! + * @fileOverview Kickass library to create and place poppers near their reference elements. + * @version 1.16.1 + * @license + * Copyright (c) 2016 Federico Zivolo and contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +var n="undefined"!=typeof window&&"undefined"!=typeof document&&"undefined"!=typeof navigator,i=function(){for(var t=["Edge","Trident","Firefox"],e=0;e=0)return 1;return 0}();var o=n&&window.Promise?function(t){var e=!1;return function(){e||(e=!0,window.Promise.resolve().then((function(){e=!1,t()})))}}:function(t){var e=!1;return function(){e||(e=!0,setTimeout((function(){e=!1,t()}),i))}};function r(t){return t&&"[object Function]"==={}.toString.call(t)}function a(t,e){if(1!==t.nodeType)return[];var n=t.ownerDocument.defaultView.getComputedStyle(t,null);return e?n[e]:n}function s(t){return"HTML"===t.nodeName?t:t.parentNode||t.host}function l(t){if(!t)return document.body;switch(t.nodeName){case"HTML":case"BODY":return t.ownerDocument.body;case"#document":return t.body}var e=a(t),n=e.overflow,i=e.overflowX,o=e.overflowY;return/(auto|scroll|overlay)/.test(n+o+i)?t:l(s(t))}function u(t){return t&&t.referenceNode?t.referenceNode:t}var f=n&&!(!window.MSInputMethodContext||!document.documentMode),d=n&&/MSIE 10/.test(navigator.userAgent);function c(t){return 11===t?f:10===t?d:f||d}function h(t){if(!t)return document.documentElement;for(var e=c(10)?document.body:null,n=t.offsetParent||null;n===e&&t.nextElementSibling;)n=(t=t.nextElementSibling).offsetParent;var i=n&&n.nodeName;return i&&"BODY"!==i&&"HTML"!==i?-1!==["TH","TD","TABLE"].indexOf(n.nodeName)&&"static"===a(n,"position")?h(n):n:t?t.ownerDocument.documentElement:document.documentElement}function p(t){return null!==t.parentNode?p(t.parentNode):t}function m(t,e){if(!(t&&t.nodeType&&e&&e.nodeType))return document.documentElement;var n=t.compareDocumentPosition(e)&Node.DOCUMENT_POSITION_FOLLOWING,i=n?t:e,o=n?e:t,r=document.createRange();r.setStart(i,0),r.setEnd(o,0);var a,s,l=r.commonAncestorContainer;if(t!==l&&e!==l||i.contains(o))return"BODY"===(s=(a=l).nodeName)||"HTML"!==s&&h(a.firstElementChild)!==a?h(l):l;var u=p(t);return u.host?m(u.host,e):m(t,p(e).host)}function g(t){var e=arguments.length>1&&void 0!==arguments[1]?arguments[1]:"top",n="top"===e?"scrollTop":"scrollLeft",i=t.nodeName;if("BODY"===i||"HTML"===i){var o=t.ownerDocument.documentElement,r=t.ownerDocument.scrollingElement||o;return r[n]}return t[n]}function v(t,e){var n=arguments.length>2&&void 0!==arguments[2]&&arguments[2],i=g(e,"top"),o=g(e,"left"),r=n?-1:1;return t.top+=i*r,t.bottom+=i*r,t.left+=o*r,t.right+=o*r,t}function _(t,e){var n="x"===e?"Left":"Top",i="Left"===n?"Right":"Bottom";return parseFloat(t["border"+n+"Width"])+parseFloat(t["border"+i+"Width"])}function b(t,e,n,i){return Math.max(e["offset"+t],e["scroll"+t],n["client"+t],n["offset"+t],n["scroll"+t],c(10)?parseInt(n["offset"+t])+parseInt(i["margin"+("Height"===t?"Top":"Left")])+parseInt(i["margin"+("Height"===t?"Bottom":"Right")]):0)}function y(t){var e=t.body,n=t.documentElement,i=c(10)&&getComputedStyle(n);return{height:b("Height",e,n,i),width:b("Width",e,n,i)}}var w=function(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")},E=function(){function t(t,e){for(var n=0;n2&&void 0!==arguments[2]&&arguments[2],i=c(10),o="HTML"===e.nodeName,r=N(t),s=N(e),u=l(t),f=a(e),d=parseFloat(f.borderTopWidth),h=parseFloat(f.borderLeftWidth);n&&o&&(s.top=Math.max(s.top,0),s.left=Math.max(s.left,0));var p=S({top:r.top-s.top-d,left:r.left-s.left-h,width:r.width,height:r.height});if(p.marginTop=0,p.marginLeft=0,!i&&o){var m=parseFloat(f.marginTop),g=parseFloat(f.marginLeft);p.top-=d-m,p.bottom-=d-m,p.left-=h-g,p.right-=h-g,p.marginTop=m,p.marginLeft=g}return(i&&!n?e.contains(u):e===u&&"BODY"!==u.nodeName)&&(p=v(p,e)),p}function k(t){var e=arguments.length>1&&void 0!==arguments[1]&&arguments[1],n=t.ownerDocument.documentElement,i=D(t,n),o=Math.max(n.clientWidth,window.innerWidth||0),r=Math.max(n.clientHeight,window.innerHeight||0),a=e?0:g(n),s=e?0:g(n,"left"),l={top:a-i.top+i.marginTop,left:s-i.left+i.marginLeft,width:o,height:r};return S(l)}function A(t){var e=t.nodeName;if("BODY"===e||"HTML"===e)return!1;if("fixed"===a(t,"position"))return!0;var n=s(t);return!!n&&A(n)}function I(t){if(!t||!t.parentElement||c())return document.documentElement;for(var e=t.parentElement;e&&"none"===a(e,"transform");)e=e.parentElement;return e||document.documentElement}function O(t,e,n,i){var o=arguments.length>4&&void 0!==arguments[4]&&arguments[4],r={top:0,left:0},a=o?I(t):m(t,u(e));if("viewport"===i)r=k(a,o);else{var f=void 0;"scrollParent"===i?"BODY"===(f=l(s(e))).nodeName&&(f=t.ownerDocument.documentElement):f="window"===i?t.ownerDocument.documentElement:i;var d=D(f,a,o);if("HTML"!==f.nodeName||A(a))r=d;else{var c=y(t.ownerDocument),h=c.height,p=c.width;r.top+=d.top-d.marginTop,r.bottom=h+d.top,r.left+=d.left-d.marginLeft,r.right=p+d.left}}var g="number"==typeof(n=n||0);return r.left+=g?n:n.left||0,r.top+=g?n:n.top||0,r.right-=g?n:n.right||0,r.bottom-=g?n:n.bottom||0,r}function x(t){return t.width*t.height}function j(t,e,n,i,o){var r=arguments.length>5&&void 0!==arguments[5]?arguments[5]:0;if(-1===t.indexOf("auto"))return t;var a=O(n,i,r,o),s={top:{width:a.width,height:e.top-a.top},right:{width:a.right-e.right,height:a.height},bottom:{width:a.width,height:a.bottom-e.bottom},left:{width:e.left-a.left,height:a.height}},l=Object.keys(s).map((function(t){return C({key:t},s[t],{area:x(s[t])})})).sort((function(t,e){return e.area-t.area})),u=l.filter((function(t){var e=t.width,i=t.height;return e>=n.clientWidth&&i>=n.clientHeight})),f=u.length>0?u[0].key:l[0].key,d=t.split("-")[1];return f+(d?"-"+d:"")}function L(t,e,n){var i=arguments.length>3&&void 0!==arguments[3]?arguments[3]:null,o=i?I(e):m(e,u(n));return D(n,o,i)}function P(t){var e=t.ownerDocument.defaultView.getComputedStyle(t),n=parseFloat(e.marginTop||0)+parseFloat(e.marginBottom||0),i=parseFloat(e.marginLeft||0)+parseFloat(e.marginRight||0);return{width:t.offsetWidth+i,height:t.offsetHeight+n}}function F(t){var e={left:"right",right:"left",bottom:"top",top:"bottom"};return t.replace(/left|right|bottom|top/g,(function(t){return e[t]}))}function R(t,e,n){n=n.split("-")[0];var i=P(t),o={width:i.width,height:i.height},r=-1!==["right","left"].indexOf(n),a=r?"top":"left",s=r?"left":"top",l=r?"height":"width",u=r?"width":"height";return o[a]=e[a]+e[l]/2-i[l]/2,o[s]=n===s?e[s]-i[u]:e[F(s)],o}function M(t,e){return Array.prototype.find?t.find(e):t.filter(e)[0]}function B(t,e,n){return(void 0===n?t:t.slice(0,function(t,e,n){if(Array.prototype.findIndex)return t.findIndex((function(t){return t[e]===n}));var i=M(t,(function(t){return t[e]===n}));return t.indexOf(i)}(t,"name",n))).forEach((function(t){t.function&&console.warn("`modifier.function` is deprecated, use `modifier.fn`!");var n=t.function||t.fn;t.enabled&&r(n)&&(e.offsets.popper=S(e.offsets.popper),e.offsets.reference=S(e.offsets.reference),e=n(e,t))})),e}function H(){if(!this.state.isDestroyed){var t={instance:this,styles:{},arrowStyles:{},attributes:{},flipped:!1,offsets:{}};t.offsets.reference=L(this.state,this.popper,this.reference,this.options.positionFixed),t.placement=j(this.options.placement,t.offsets.reference,this.popper,this.reference,this.options.modifiers.flip.boundariesElement,this.options.modifiers.flip.padding),t.originalPlacement=t.placement,t.positionFixed=this.options.positionFixed,t.offsets.popper=R(this.popper,t.offsets.reference,t.placement),t.offsets.popper.position=this.options.positionFixed?"fixed":"absolute",t=B(this.modifiers,t),this.state.isCreated?this.options.onUpdate(t):(this.state.isCreated=!0,this.options.onCreate(t))}}function q(t,e){return t.some((function(t){var n=t.name;return t.enabled&&n===e}))}function Q(t){for(var e=[!1,"ms","Webkit","Moz","O"],n=t.charAt(0).toUpperCase()+t.slice(1),i=0;i1&&void 0!==arguments[1]&&arguments[1],n=Z.indexOf(t),i=Z.slice(n+1).concat(Z.slice(0,n));return e?i.reverse():i}var et="flip",nt="clockwise",it="counterclockwise";function ot(t,e,n,i){var o=[0,0],r=-1!==["right","left"].indexOf(i),a=t.split(/(\+|\-)/).map((function(t){return t.trim()})),s=a.indexOf(M(a,(function(t){return-1!==t.search(/,|\s/)})));a[s]&&-1===a[s].indexOf(",")&&console.warn("Offsets separated by white space(s) are deprecated, use a comma (,) instead.");var l=/\s*,\s*|\s+/,u=-1!==s?[a.slice(0,s).concat([a[s].split(l)[0]]),[a[s].split(l)[1]].concat(a.slice(s+1))]:[a];return(u=u.map((function(t,i){var o=(1===i?!r:r)?"height":"width",a=!1;return t.reduce((function(t,e){return""===t[t.length-1]&&-1!==["+","-"].indexOf(e)?(t[t.length-1]=e,a=!0,t):a?(t[t.length-1]+=e,a=!1,t):t.concat(e)}),[]).map((function(t){return function(t,e,n,i){var o=t.match(/((?:\-|\+)?\d*\.?\d*)(.*)/),r=+o[1],a=o[2];if(!r)return t;if(0===a.indexOf("%")){var s=void 0;switch(a){case"%p":s=n;break;case"%":case"%r":default:s=i}return S(s)[e]/100*r}if("vh"===a||"vw"===a){return("vh"===a?Math.max(document.documentElement.clientHeight,window.innerHeight||0):Math.max(document.documentElement.clientWidth,window.innerWidth||0))/100*r}return r}(t,o,e,n)}))}))).forEach((function(t,e){t.forEach((function(n,i){K(n)&&(o[e]+=n*("-"===t[i-1]?-1:1))}))})),o}var rt={placement:"bottom",positionFixed:!1,eventsEnabled:!0,removeOnDestroy:!1,onCreate:function(){},onUpdate:function(){},modifiers:{shift:{order:100,enabled:!0,fn:function(t){var e=t.placement,n=e.split("-")[0],i=e.split("-")[1];if(i){var o=t.offsets,r=o.reference,a=o.popper,s=-1!==["bottom","top"].indexOf(n),l=s?"left":"top",u=s?"width":"height",f={start:T({},l,r[l]),end:T({},l,r[l]+r[u]-a[u])};t.offsets.popper=C({},a,f[i])}return t}},offset:{order:200,enabled:!0,fn:function(t,e){var n=e.offset,i=t.placement,o=t.offsets,r=o.popper,a=o.reference,s=i.split("-")[0],l=void 0;return l=K(+n)?[+n,0]:ot(n,r,a,s),"left"===s?(r.top+=l[0],r.left-=l[1]):"right"===s?(r.top+=l[0],r.left+=l[1]):"top"===s?(r.left+=l[0],r.top-=l[1]):"bottom"===s&&(r.left+=l[0],r.top+=l[1]),t.popper=r,t},offset:0},preventOverflow:{order:300,enabled:!0,fn:function(t,e){var n=e.boundariesElement||h(t.instance.popper);t.instance.reference===n&&(n=h(n));var i=Q("transform"),o=t.instance.popper.style,r=o.top,a=o.left,s=o[i];o.top="",o.left="",o[i]="";var l=O(t.instance.popper,t.instance.reference,e.padding,n,t.positionFixed);o.top=r,o.left=a,o[i]=s,e.boundaries=l;var u=e.priority,f=t.offsets.popper,d={primary:function(t){var n=f[t];return f[t]l[t]&&!e.escapeWithReference&&(i=Math.min(f[n],l[t]-("right"===t?f.width:f.height))),T({},n,i)}};return u.forEach((function(t){var e=-1!==["left","top"].indexOf(t)?"primary":"secondary";f=C({},f,d[e](t))})),t.offsets.popper=f,t},priority:["left","right","top","bottom"],padding:5,boundariesElement:"scrollParent"},keepTogether:{order:400,enabled:!0,fn:function(t){var e=t.offsets,n=e.popper,i=e.reference,o=t.placement.split("-")[0],r=Math.floor,a=-1!==["top","bottom"].indexOf(o),s=a?"right":"bottom",l=a?"left":"top",u=a?"width":"height";return n[s]r(i[s])&&(t.offsets.popper[l]=r(i[s])),t}},arrow:{order:500,enabled:!0,fn:function(t,e){var n;if(!G(t.instance.modifiers,"arrow","keepTogether"))return t;var i=e.element;if("string"==typeof i){if(!(i=t.instance.popper.querySelector(i)))return t}else if(!t.instance.popper.contains(i))return console.warn("WARNING: `arrow.element` must be child of its popper element!"),t;var o=t.placement.split("-")[0],r=t.offsets,s=r.popper,l=r.reference,u=-1!==["left","right"].indexOf(o),f=u?"height":"width",d=u?"Top":"Left",c=d.toLowerCase(),h=u?"left":"top",p=u?"bottom":"right",m=P(i)[f];l[p]-ms[p]&&(t.offsets.popper[c]+=l[c]+m-s[p]),t.offsets.popper=S(t.offsets.popper);var g=l[c]+l[f]/2-m/2,v=a(t.instance.popper),_=parseFloat(v["margin"+d]),b=parseFloat(v["border"+d+"Width"]),y=g-t.offsets.popper[c]-_-b;return y=Math.max(Math.min(s[f]-m,y),0),t.arrowElement=i,t.offsets.arrow=(T(n={},c,Math.round(y)),T(n,h,""),n),t},element:"[x-arrow]"},flip:{order:600,enabled:!0,fn:function(t,e){if(q(t.instance.modifiers,"inner"))return t;if(t.flipped&&t.placement===t.originalPlacement)return t;var n=O(t.instance.popper,t.instance.reference,e.padding,e.boundariesElement,t.positionFixed),i=t.placement.split("-")[0],o=F(i),r=t.placement.split("-")[1]||"",a=[];switch(e.behavior){case et:a=[i,o];break;case nt:a=tt(i);break;case it:a=tt(i,!0);break;default:a=e.behavior}return a.forEach((function(s,l){if(i!==s||a.length===l+1)return t;i=t.placement.split("-")[0],o=F(i);var u=t.offsets.popper,f=t.offsets.reference,d=Math.floor,c="left"===i&&d(u.right)>d(f.left)||"right"===i&&d(u.left)d(f.top)||"bottom"===i&&d(u.top)d(n.right),m=d(u.top)d(n.bottom),v="left"===i&&h||"right"===i&&p||"top"===i&&m||"bottom"===i&&g,_=-1!==["top","bottom"].indexOf(i),b=!!e.flipVariations&&(_&&"start"===r&&h||_&&"end"===r&&p||!_&&"start"===r&&m||!_&&"end"===r&&g),y=!!e.flipVariationsByContent&&(_&&"start"===r&&p||_&&"end"===r&&h||!_&&"start"===r&&g||!_&&"end"===r&&m),w=b||y;(c||v||w)&&(t.flipped=!0,(c||v)&&(i=a[l+1]),w&&(r=function(t){return"end"===t?"start":"start"===t?"end":t}(r)),t.placement=i+(r?"-"+r:""),t.offsets.popper=C({},t.offsets.popper,R(t.instance.popper,t.offsets.reference,t.placement)),t=B(t.instance.modifiers,t,"flip"))})),t},behavior:"flip",padding:5,boundariesElement:"viewport",flipVariations:!1,flipVariationsByContent:!1},inner:{order:700,enabled:!1,fn:function(t){var e=t.placement,n=e.split("-")[0],i=t.offsets,o=i.popper,r=i.reference,a=-1!==["left","right"].indexOf(n),s=-1===["top","left"].indexOf(n);return o[a?"left":"top"]=r[n]-(s?o[a?"width":"height"]:0),t.placement=F(e),t.offsets.popper=S(o),t}},hide:{order:800,enabled:!0,fn:function(t){if(!G(t.instance.modifiers,"hide","preventOverflow"))return t;var e=t.offsets.reference,n=M(t.instance.modifiers,(function(t){return"preventOverflow"===t.name})).boundaries;if(e.bottomn.right||e.top>n.bottom||e.right2&&void 0!==arguments[2]?arguments[2]:{};w(this,t),this.scheduleUpdate=function(){return requestAnimationFrame(i.update)},this.update=o(this.update.bind(this)),this.options=C({},t.Defaults,a),this.state={isDestroyed:!1,isCreated:!1,scrollParents:[]},this.reference=e&&e.jquery?e[0]:e,this.popper=n&&n.jquery?n[0]:n,this.options.modifiers={},Object.keys(C({},t.Defaults.modifiers,a.modifiers)).forEach((function(e){i.options.modifiers[e]=C({},t.Defaults.modifiers[e]||{},a.modifiers?a.modifiers[e]:{})})),this.modifiers=Object.keys(this.options.modifiers).map((function(t){return C({name:t},i.options.modifiers[t])})).sort((function(t,e){return t.order-e.order})),this.modifiers.forEach((function(t){t.enabled&&r(t.onLoad)&&t.onLoad(i.reference,i.popper,i.options,t,i.state)})),this.update();var s=this.options.eventsEnabled;s&&this.enableEventListeners(),this.state.eventsEnabled=s}return E(t,[{key:"update",value:function(){return H.call(this)}},{key:"destroy",value:function(){return W.call(this)}},{key:"enableEventListeners",value:function(){return Y.call(this)}},{key:"disableEventListeners",value:function(){return z.call(this)}}]),t}();at.Utils=("undefined"!=typeof window?window:t).PopperUtils,at.placements=J,at.Defaults=rt,e.default=at}.call(this,n(4))},function(t,e,n){t.exports=n(5)},function(t,e,n){ +/*! + * Bootstrap v4.6.1 (https://getbootstrap.com/) + * Copyright 2011-2021 The Bootstrap Authors (https://github.com/twbs/bootstrap/graphs/contributors) + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) + */ +!function(t,e,n){"use strict";function i(t){return t&&"object"==typeof t&&"default"in t?t:{default:t}}var o=i(e),r=i(n);function a(t,e){for(var n=0;n=4)throw new Error("Bootstrap's JavaScript requires at least jQuery v1.9.1 but less than v4.0.0")}};d.jQueryDetection(),o.default.fn.emulateTransitionEnd=f,o.default.event.special[d.TRANSITION_END]={bindType:"transitionend",delegateType:"transitionend",handle:function(t){if(o.default(t.target).is(this))return t.handleObj.handler.apply(this,arguments)}};var c=o.default.fn.alert,h=function(){function t(t){this._element=t}var e=t.prototype;return e.close=function(t){var e=this._element;t&&(e=this._getRootElement(t)),this._triggerCloseEvent(e).isDefaultPrevented()||this._removeElement(e)},e.dispose=function(){o.default.removeData(this._element,"bs.alert"),this._element=null},e._getRootElement=function(t){var e=d.getSelectorFromElement(t),n=!1;return e&&(n=document.querySelector(e)),n||(n=o.default(t).closest(".alert")[0]),n},e._triggerCloseEvent=function(t){var e=o.default.Event("close.bs.alert");return o.default(t).trigger(e),e},e._removeElement=function(t){var e=this;if(o.default(t).removeClass("show"),o.default(t).hasClass("fade")){var n=d.getTransitionDurationFromElement(t);o.default(t).one(d.TRANSITION_END,(function(n){return e._destroyElement(t,n)})).emulateTransitionEnd(n)}else this._destroyElement(t)},e._destroyElement=function(t){o.default(t).detach().trigger("closed.bs.alert").remove()},t._jQueryInterface=function(e){return this.each((function(){var n=o.default(this),i=n.data("bs.alert");i||(i=new t(this),n.data("bs.alert",i)),"close"===e&&i[e](this)}))},t._handleDismiss=function(t){return function(e){e&&e.preventDefault(),t.close(this)}},s(t,null,[{key:"VERSION",get:function(){return"4.6.1"}}]),t}();o.default(document).on("click.bs.alert.data-api",'[data-dismiss="alert"]',h._handleDismiss(new h)),o.default.fn.alert=h._jQueryInterface,o.default.fn.alert.Constructor=h,o.default.fn.alert.noConflict=function(){return o.default.fn.alert=c,h._jQueryInterface};var p=o.default.fn.button,m=function(){function t(t){this._element=t,this.shouldAvoidTriggerChange=!1}var e=t.prototype;return e.toggle=function(){var t=!0,e=!0,n=o.default(this._element).closest('[data-toggle="buttons"]')[0];if(n){var i=this._element.querySelector('input:not([type="hidden"])');if(i){if("radio"===i.type)if(i.checked&&this._element.classList.contains("active"))t=!1;else{var r=n.querySelector(".active");r&&o.default(r).removeClass("active")}t&&("checkbox"!==i.type&&"radio"!==i.type||(i.checked=!this._element.classList.contains("active")),this.shouldAvoidTriggerChange||o.default(i).trigger("change")),i.focus(),e=!1}}this._element.hasAttribute("disabled")||this._element.classList.contains("disabled")||(e&&this._element.setAttribute("aria-pressed",!this._element.classList.contains("active")),t&&o.default(this._element).toggleClass("active"))},e.dispose=function(){o.default.removeData(this._element,"bs.button"),this._element=null},t._jQueryInterface=function(e,n){return this.each((function(){var i=o.default(this),r=i.data("bs.button");r||(r=new t(this),i.data("bs.button",r)),r.shouldAvoidTriggerChange=n,"toggle"===e&&r[e]()}))},s(t,null,[{key:"VERSION",get:function(){return"4.6.1"}}]),t}();o.default(document).on("click.bs.button.data-api",'[data-toggle^="button"]',(function(t){var e=t.target,n=e;if(o.default(e).hasClass("btn")||(e=o.default(e).closest(".btn")[0]),!e||e.hasAttribute("disabled")||e.classList.contains("disabled"))t.preventDefault();else{var i=e.querySelector('input:not([type="hidden"])');if(i&&(i.hasAttribute("disabled")||i.classList.contains("disabled")))return void t.preventDefault();"INPUT"!==n.tagName&&"LABEL"===e.tagName||m._jQueryInterface.call(o.default(e),"toggle","INPUT"===n.tagName)}})).on("focus.bs.button.data-api blur.bs.button.data-api",'[data-toggle^="button"]',(function(t){var e=o.default(t.target).closest(".btn")[0];o.default(e).toggleClass("focus",/^focus(in)?$/.test(t.type))})),o.default(window).on("load.bs.button.data-api",(function(){for(var t=[].slice.call(document.querySelectorAll('[data-toggle="buttons"] .btn')),e=0,n=t.length;e0,this._pointerEvent=Boolean(window.PointerEvent||window.MSPointerEvent),this._addEventListeners()}var e=t.prototype;return e.next=function(){this._isSliding||this._slide("next")},e.nextWhenVisible=function(){var t=o.default(this._element);!document.hidden&&t.is(":visible")&&"hidden"!==t.css("visibility")&&this.next()},e.prev=function(){this._isSliding||this._slide("prev")},e.pause=function(t){t||(this._isPaused=!0),this._element.querySelector(".carousel-item-next, .carousel-item-prev")&&(d.triggerTransitionEnd(this._element),this.cycle(!0)),clearInterval(this._interval),this._interval=null},e.cycle=function(t){t||(this._isPaused=!1),this._interval&&(clearInterval(this._interval),this._interval=null),this._config.interval&&!this._isPaused&&(this._updateInterval(),this._interval=setInterval((document.visibilityState?this.nextWhenVisible:this.next).bind(this),this._config.interval))},e.to=function(t){var e=this;this._activeElement=this._element.querySelector(".active.carousel-item");var n=this._getItemIndex(this._activeElement);if(!(t>this._items.length-1||t<0))if(this._isSliding)o.default(this._element).one("slid.bs.carousel",(function(){return e.to(t)}));else{if(n===t)return this.pause(),void this.cycle();var i=t>n?"next":"prev";this._slide(i,this._items[t])}},e.dispose=function(){o.default(this._element).off(v),o.default.removeData(this._element,"bs.carousel"),this._items=null,this._config=null,this._element=null,this._interval=null,this._isPaused=null,this._isSliding=null,this._activeElement=null,this._indicatorsElement=null},e._getConfig=function(t){return t=l({},b,t),d.typeCheckConfig(g,t,y),t},e._handleSwipe=function(){var t=Math.abs(this.touchDeltaX);if(!(t<=40)){var e=t/this.touchDeltaX;this.touchDeltaX=0,e>0&&this.prev(),e<0&&this.next()}},e._addEventListeners=function(){var t=this;this._config.keyboard&&o.default(this._element).on("keydown.bs.carousel",(function(e){return t._keydown(e)})),"hover"===this._config.pause&&o.default(this._element).on("mouseenter.bs.carousel",(function(e){return t.pause(e)})).on("mouseleave.bs.carousel",(function(e){return t.cycle(e)})),this._config.touch&&this._addTouchEventListeners()},e._addTouchEventListeners=function(){var t=this;if(this._touchSupported){var e=function(e){t._pointerEvent&&w[e.originalEvent.pointerType.toUpperCase()]?t.touchStartX=e.originalEvent.clientX:t._pointerEvent||(t.touchStartX=e.originalEvent.touches[0].clientX)},n=function(e){t._pointerEvent&&w[e.originalEvent.pointerType.toUpperCase()]&&(t.touchDeltaX=e.originalEvent.clientX-t.touchStartX),t._handleSwipe(),"hover"===t._config.pause&&(t.pause(),t.touchTimeout&&clearTimeout(t.touchTimeout),t.touchTimeout=setTimeout((function(e){return t.cycle(e)}),500+t._config.interval))};o.default(this._element.querySelectorAll(".carousel-item img")).on("dragstart.bs.carousel",(function(t){return t.preventDefault()})),this._pointerEvent?(o.default(this._element).on("pointerdown.bs.carousel",(function(t){return e(t)})),o.default(this._element).on("pointerup.bs.carousel",(function(t){return n(t)})),this._element.classList.add("pointer-event")):(o.default(this._element).on("touchstart.bs.carousel",(function(t){return e(t)})),o.default(this._element).on("touchmove.bs.carousel",(function(e){return function(e){t.touchDeltaX=e.originalEvent.touches&&e.originalEvent.touches.length>1?0:e.originalEvent.touches[0].clientX-t.touchStartX}(e)})),o.default(this._element).on("touchend.bs.carousel",(function(t){return n(t)})))}},e._keydown=function(t){if(!/input|textarea/i.test(t.target.tagName))switch(t.which){case 37:t.preventDefault(),this.prev();break;case 39:t.preventDefault(),this.next()}},e._getItemIndex=function(t){return this._items=t&&t.parentNode?[].slice.call(t.parentNode.querySelectorAll(".carousel-item")):[],this._items.indexOf(t)},e._getItemByDirection=function(t,e){var n="next"===t,i="prev"===t,o=this._getItemIndex(e),r=this._items.length-1;if((i&&0===o||n&&o===r)&&!this._config.wrap)return e;var a=(o+("prev"===t?-1:1))%this._items.length;return-1===a?this._items[this._items.length-1]:this._items[a]},e._triggerSlideEvent=function(t,e){var n=this._getItemIndex(t),i=this._getItemIndex(this._element.querySelector(".active.carousel-item")),r=o.default.Event("slide.bs.carousel",{relatedTarget:t,direction:e,from:i,to:n});return o.default(this._element).trigger(r),r},e._setActiveIndicatorElement=function(t){if(this._indicatorsElement){var e=[].slice.call(this._indicatorsElement.querySelectorAll(".active"));o.default(e).removeClass("active");var n=this._indicatorsElement.children[this._getItemIndex(t)];n&&o.default(n).addClass("active")}},e._updateInterval=function(){var t=this._activeElement||this._element.querySelector(".active.carousel-item");if(t){var e=parseInt(t.getAttribute("data-interval"),10);e?(this._config.defaultInterval=this._config.defaultInterval||this._config.interval,this._config.interval=e):this._config.interval=this._config.defaultInterval||this._config.interval}},e._slide=function(t,e){var n,i,r,a=this,s=this._element.querySelector(".active.carousel-item"),l=this._getItemIndex(s),u=e||s&&this._getItemByDirection(t,s),f=this._getItemIndex(u),c=Boolean(this._interval);if("next"===t?(n="carousel-item-left",i="carousel-item-next",r="left"):(n="carousel-item-right",i="carousel-item-prev",r="right"),u&&o.default(u).hasClass("active"))this._isSliding=!1;else if(!this._triggerSlideEvent(u,r).isDefaultPrevented()&&s&&u){this._isSliding=!0,c&&this.pause(),this._setActiveIndicatorElement(u),this._activeElement=u;var h=o.default.Event("slid.bs.carousel",{relatedTarget:u,direction:r,from:l,to:f});if(o.default(this._element).hasClass("slide")){o.default(u).addClass(i),d.reflow(u),o.default(s).addClass(n),o.default(u).addClass(n);var p=d.getTransitionDurationFromElement(s);o.default(s).one(d.TRANSITION_END,(function(){o.default(u).removeClass(n+" "+i).addClass("active"),o.default(s).removeClass("active "+i+" "+n),a._isSliding=!1,setTimeout((function(){return o.default(a._element).trigger(h)}),0)})).emulateTransitionEnd(p)}else o.default(s).removeClass("active"),o.default(u).addClass("active"),this._isSliding=!1,o.default(this._element).trigger(h);c&&this.cycle()}},t._jQueryInterface=function(e){return this.each((function(){var n=o.default(this).data("bs.carousel"),i=l({},b,o.default(this).data());"object"==typeof e&&(i=l({},i,e));var r="string"==typeof e?e:i.slide;if(n||(n=new t(this,i),o.default(this).data("bs.carousel",n)),"number"==typeof e)n.to(e);else if("string"==typeof r){if(void 0===n[r])throw new TypeError('No method named "'+r+'"');n[r]()}else i.interval&&i.ride&&(n.pause(),n.cycle())}))},t._dataApiClickHandler=function(e){var n=d.getSelectorFromElement(this);if(n){var i=o.default(n)[0];if(i&&o.default(i).hasClass("carousel")){var r=l({},o.default(i).data(),o.default(this).data()),a=this.getAttribute("data-slide-to");a&&(r.interval=!1),t._jQueryInterface.call(o.default(i),r),a&&o.default(i).data("bs.carousel").to(a),e.preventDefault()}}},s(t,null,[{key:"VERSION",get:function(){return"4.6.1"}},{key:"Default",get:function(){return b}}]),t}();o.default(document).on("click.bs.carousel.data-api","[data-slide], [data-slide-to]",E._dataApiClickHandler),o.default(window).on("load.bs.carousel.data-api",(function(){for(var t=[].slice.call(document.querySelectorAll('[data-ride="carousel"]')),e=0,n=t.length;e0&&(this._selector=a,this._triggerArray.push(r))}this._parent=this._config.parent?this._getParent():null,this._config.parent||this._addAriaAndCollapsedClass(this._element,this._triggerArray),this._config.toggle&&this.toggle()}var e=t.prototype;return e.toggle=function(){o.default(this._element).hasClass("show")?this.hide():this.show()},e.show=function(){var e,n,i=this;if(!(this._isTransitioning||o.default(this._element).hasClass("show")||(this._parent&&0===(e=[].slice.call(this._parent.querySelectorAll(".show, .collapsing")).filter((function(t){return"string"==typeof i._config.parent?t.getAttribute("data-parent")===i._config.parent:t.classList.contains("collapse")}))).length&&(e=null),e&&(n=o.default(e).not(this._selector).data("bs.collapse"))&&n._isTransitioning))){var r=o.default.Event("show.bs.collapse");if(o.default(this._element).trigger(r),!r.isDefaultPrevented()){e&&(t._jQueryInterface.call(o.default(e).not(this._selector),"hide"),n||o.default(e).data("bs.collapse",null));var a=this._getDimension();o.default(this._element).removeClass("collapse").addClass("collapsing"),this._element.style[a]=0,this._triggerArray.length&&o.default(this._triggerArray).removeClass("collapsed").attr("aria-expanded",!0),this.setTransitioning(!0);var s="scroll"+(a[0].toUpperCase()+a.slice(1)),l=d.getTransitionDurationFromElement(this._element);o.default(this._element).one(d.TRANSITION_END,(function(){o.default(i._element).removeClass("collapsing").addClass("collapse show"),i._element.style[a]="",i.setTransitioning(!1),o.default(i._element).trigger("shown.bs.collapse")})).emulateTransitionEnd(l),this._element.style[a]=this._element[s]+"px"}}},e.hide=function(){var t=this;if(!this._isTransitioning&&o.default(this._element).hasClass("show")){var e=o.default.Event("hide.bs.collapse");if(o.default(this._element).trigger(e),!e.isDefaultPrevented()){var n=this._getDimension();this._element.style[n]=this._element.getBoundingClientRect()[n]+"px",d.reflow(this._element),o.default(this._element).addClass("collapsing").removeClass("collapse show");var i=this._triggerArray.length;if(i>0)for(var r=0;r0},e._getOffset=function(){var t=this,e={};return"function"==typeof this._config.offset?e.fn=function(e){return e.offsets=l({},e.offsets,t._config.offset(e.offsets,t._element)),e}:e.offset=this._config.offset,e},e._getPopperConfig=function(){var t={placement:this._getPlacement(),modifiers:{offset:this._getOffset(),flip:{enabled:this._config.flip},preventOverflow:{boundariesElement:this._config.boundary}}};return"static"===this._config.display&&(t.modifiers.applyStyle={enabled:!1}),l({},t,this._config.popperConfig)},t._jQueryInterface=function(e){return this.each((function(){var n=o.default(this).data("bs.dropdown");if(n||(n=new t(this,"object"==typeof e?e:null),o.default(this).data("bs.dropdown",n)),"string"==typeof e){if(void 0===n[e])throw new TypeError('No method named "'+e+'"');n[e]()}}))},t._clearMenus=function(e){if(!e||3!==e.which&&("keyup"!==e.type||9===e.which))for(var n=[].slice.call(document.querySelectorAll('[data-toggle="dropdown"]')),i=0,r=n.length;i0&&a--,40===e.which&&adocument.documentElement.clientHeight;n||(this._element.style.overflowY="hidden"),this._element.classList.add("modal-static");var i=d.getTransitionDurationFromElement(this._dialog);o.default(this._element).off(d.TRANSITION_END),o.default(this._element).one(d.TRANSITION_END,(function(){t._element.classList.remove("modal-static"),n||o.default(t._element).one(d.TRANSITION_END,(function(){t._element.style.overflowY=""})).emulateTransitionEnd(t._element,i)})).emulateTransitionEnd(i),this._element.focus()}},e._showElement=function(t){var e=this,n=o.default(this._element).hasClass("fade"),i=this._dialog?this._dialog.querySelector(".modal-body"):null;this._element.parentNode&&this._element.parentNode.nodeType===Node.ELEMENT_NODE||document.body.appendChild(this._element),this._element.style.display="block",this._element.removeAttribute("aria-hidden"),this._element.setAttribute("aria-modal",!0),this._element.setAttribute("role","dialog"),o.default(this._dialog).hasClass("modal-dialog-scrollable")&&i?i.scrollTop=0:this._element.scrollTop=0,n&&d.reflow(this._element),o.default(this._element).addClass("show"),this._config.focus&&this._enforceFocus();var r=o.default.Event("shown.bs.modal",{relatedTarget:t}),a=function(){e._config.focus&&e._element.focus(),e._isTransitioning=!1,o.default(e._element).trigger(r)};if(n){var s=d.getTransitionDurationFromElement(this._dialog);o.default(this._dialog).one(d.TRANSITION_END,a).emulateTransitionEnd(s)}else a()},e._enforceFocus=function(){var t=this;o.default(document).off("focusin.bs.modal").on("focusin.bs.modal",(function(e){document!==e.target&&t._element!==e.target&&0===o.default(t._element).has(e.target).length&&t._element.focus()}))},e._setEscapeEvent=function(){var t=this;this._isShown?o.default(this._element).on("keydown.dismiss.bs.modal",(function(e){t._config.keyboard&&27===e.which?(e.preventDefault(),t.hide()):t._config.keyboard||27!==e.which||t._triggerBackdropTransition()})):this._isShown||o.default(this._element).off("keydown.dismiss.bs.modal")},e._setResizeEvent=function(){var t=this;this._isShown?o.default(window).on("resize.bs.modal",(function(e){return t.handleUpdate(e)})):o.default(window).off("resize.bs.modal")},e._hideModal=function(){var t=this;this._element.style.display="none",this._element.setAttribute("aria-hidden",!0),this._element.removeAttribute("aria-modal"),this._element.removeAttribute("role"),this._isTransitioning=!1,this._showBackdrop((function(){o.default(document.body).removeClass("modal-open"),t._resetAdjustments(),t._resetScrollbar(),o.default(t._element).trigger("hidden.bs.modal")}))},e._removeBackdrop=function(){this._backdrop&&(o.default(this._backdrop).remove(),this._backdrop=null)},e._showBackdrop=function(t){var e=this,n=o.default(this._element).hasClass("fade")?"fade":"";if(this._isShown&&this._config.backdrop){if(this._backdrop=document.createElement("div"),this._backdrop.className="modal-backdrop",n&&this._backdrop.classList.add(n),o.default(this._backdrop).appendTo(document.body),o.default(this._element).on("click.dismiss.bs.modal",(function(t){e._ignoreBackdropClick?e._ignoreBackdropClick=!1:t.target===t.currentTarget&&("static"===e._config.backdrop?e._triggerBackdropTransition():e.hide())})),n&&d.reflow(this._backdrop),o.default(this._backdrop).addClass("show"),!t)return;if(!n)return void t();var i=d.getTransitionDurationFromElement(this._backdrop);o.default(this._backdrop).one(d.TRANSITION_END,t).emulateTransitionEnd(i)}else if(!this._isShown&&this._backdrop){o.default(this._backdrop).removeClass("show");var r=function(){e._removeBackdrop(),t&&t()};if(o.default(this._element).hasClass("fade")){var a=d.getTransitionDurationFromElement(this._backdrop);o.default(this._backdrop).one(d.TRANSITION_END,r).emulateTransitionEnd(a)}else r()}else t&&t()},e._adjustDialog=function(){var t=this._element.scrollHeight>document.documentElement.clientHeight;!this._isBodyOverflowing&&t&&(this._element.style.paddingLeft=this._scrollbarWidth+"px"),this._isBodyOverflowing&&!t&&(this._element.style.paddingRight=this._scrollbarWidth+"px")},e._resetAdjustments=function(){this._element.style.paddingLeft="",this._element.style.paddingRight=""},e._checkScrollbar=function(){var t=document.body.getBoundingClientRect();this._isBodyOverflowing=Math.round(t.left+t.right)
',trigger:"hover focus",title:"",delay:0,html:!1,selector:!1,placement:"top",offset:0,container:!1,fallbackPlacement:"flip",boundary:"scrollParent",customClass:"",sanitize:!0,sanitizeFn:null,whiteList:B,popperConfig:null},X={animation:"boolean",template:"string",title:"(string|element|function)",trigger:"string",delay:"(number|object)",html:"boolean",selector:"(string|boolean)",placement:"(string|function)",offset:"(number|string|function)",container:"(string|element|boolean)",fallbackPlacement:"(string|array)",boundary:"(string|element)",customClass:"(string|function)",sanitize:"boolean",sanitizeFn:"(null|function)",whiteList:"object",popperConfig:"(null|object)"},$={HIDE:"hide.bs.tooltip",HIDDEN:"hidden.bs.tooltip",SHOW:"show.bs.tooltip",SHOWN:"shown.bs.tooltip",INSERTED:"inserted.bs.tooltip",CLICK:"click.bs.tooltip",FOCUSIN:"focusin.bs.tooltip",FOCUSOUT:"focusout.bs.tooltip",MOUSEENTER:"mouseenter.bs.tooltip",MOUSELEAVE:"mouseleave.bs.tooltip"},G=function(){function t(t,e){if(void 0===r.default)throw new TypeError("Bootstrap's tooltips require Popper (https://popper.js.org)");this._isEnabled=!0,this._timeout=0,this._hoverState="",this._activeTrigger={},this._popper=null,this.element=t,this.config=this._getConfig(e),this.tip=null,this._setListeners()}var e=t.prototype;return e.enable=function(){this._isEnabled=!0},e.disable=function(){this._isEnabled=!1},e.toggleEnabled=function(){this._isEnabled=!this._isEnabled},e.toggle=function(t){if(this._isEnabled)if(t){var e=this.constructor.DATA_KEY,n=o.default(t.currentTarget).data(e);n||(n=new this.constructor(t.currentTarget,this._getDelegateConfig()),o.default(t.currentTarget).data(e,n)),n._activeTrigger.click=!n._activeTrigger.click,n._isWithActiveTrigger()?n._enter(null,n):n._leave(null,n)}else{if(o.default(this.getTipElement()).hasClass("show"))return void this._leave(null,this);this._enter(null,this)}},e.dispose=function(){clearTimeout(this._timeout),o.default.removeData(this.element,this.constructor.DATA_KEY),o.default(this.element).off(this.constructor.EVENT_KEY),o.default(this.element).closest(".modal").off("hide.bs.modal",this._hideModalHandler),this.tip&&o.default(this.tip).remove(),this._isEnabled=null,this._timeout=null,this._hoverState=null,this._activeTrigger=null,this._popper&&this._popper.destroy(),this._popper=null,this.element=null,this.config=null,this.tip=null},e.show=function(){var t=this;if("none"===o.default(this.element).css("display"))throw new Error("Please use show on visible elements");var e=o.default.Event(this.constructor.Event.SHOW);if(this.isWithContent()&&this._isEnabled){o.default(this.element).trigger(e);var n=d.findShadowRoot(this.element),i=o.default.contains(null!==n?n:this.element.ownerDocument.documentElement,this.element);if(e.isDefaultPrevented()||!i)return;var a=this.getTipElement(),s=d.getUID(this.constructor.NAME);a.setAttribute("id",s),this.element.setAttribute("aria-describedby",s),this.setContent(),this.config.animation&&o.default(a).addClass("fade");var l="function"==typeof this.config.placement?this.config.placement.call(this,a,this.element):this.config.placement,u=this._getAttachment(l);this.addAttachmentClass(u);var f=this._getContainer();o.default(a).data(this.constructor.DATA_KEY,this),o.default.contains(this.element.ownerDocument.documentElement,this.tip)||o.default(a).appendTo(f),o.default(this.element).trigger(this.constructor.Event.INSERTED),this._popper=new r.default(this.element,a,this._getPopperConfig(u)),o.default(a).addClass("show"),o.default(a).addClass(this.config.customClass),"ontouchstart"in document.documentElement&&o.default(document.body).children().on("mouseover",null,o.default.noop);var c=function(){t.config.animation&&t._fixTransition();var e=t._hoverState;t._hoverState=null,o.default(t.element).trigger(t.constructor.Event.SHOWN),"out"===e&&t._leave(null,t)};if(o.default(this.tip).hasClass("fade")){var h=d.getTransitionDurationFromElement(this.tip);o.default(this.tip).one(d.TRANSITION_END,c).emulateTransitionEnd(h)}else c()}},e.hide=function(t){var e=this,n=this.getTipElement(),i=o.default.Event(this.constructor.Event.HIDE),r=function(){"show"!==e._hoverState&&n.parentNode&&n.parentNode.removeChild(n),e._cleanTipClass(),e.element.removeAttribute("aria-describedby"),o.default(e.element).trigger(e.constructor.Event.HIDDEN),null!==e._popper&&e._popper.destroy(),t&&t()};if(o.default(this.element).trigger(i),!i.isDefaultPrevented()){if(o.default(n).removeClass("show"),"ontouchstart"in document.documentElement&&o.default(document.body).children().off("mouseover",null,o.default.noop),this._activeTrigger.click=!1,this._activeTrigger.focus=!1,this._activeTrigger.hover=!1,o.default(this.tip).hasClass("fade")){var a=d.getTransitionDurationFromElement(n);o.default(n).one(d.TRANSITION_END,r).emulateTransitionEnd(a)}else r();this._hoverState=""}},e.update=function(){null!==this._popper&&this._popper.scheduleUpdate()},e.isWithContent=function(){return Boolean(this.getTitle())},e.addAttachmentClass=function(t){o.default(this.getTipElement()).addClass("bs-tooltip-"+t)},e.getTipElement=function(){return this.tip=this.tip||o.default(this.config.template)[0],this.tip},e.setContent=function(){var t=this.getTipElement();this.setElementContent(o.default(t.querySelectorAll(".tooltip-inner")),this.getTitle()),o.default(t).removeClass("fade show")},e.setElementContent=function(t,e){"object"!=typeof e||!e.nodeType&&!e.jquery?this.config.html?(this.config.sanitize&&(e=Q(e,this.config.whiteList,this.config.sanitizeFn)),t.html(e)):t.text(e):this.config.html?o.default(e).parent().is(t)||t.empty().append(e):t.text(o.default(e).text())},e.getTitle=function(){var t=this.element.getAttribute("data-original-title");return t||(t="function"==typeof this.config.title?this.config.title.call(this.element):this.config.title),t},e._getPopperConfig=function(t){var e=this;return l({},{placement:t,modifiers:{offset:this._getOffset(),flip:{behavior:this.config.fallbackPlacement},arrow:{element:".arrow"},preventOverflow:{boundariesElement:this.config.boundary}},onCreate:function(t){t.originalPlacement!==t.placement&&e._handlePopperPlacementChange(t)},onUpdate:function(t){return e._handlePopperPlacementChange(t)}},this.config.popperConfig)},e._getOffset=function(){var t=this,e={};return"function"==typeof this.config.offset?e.fn=function(e){return e.offsets=l({},e.offsets,t.config.offset(e.offsets,t.element)),e}:e.offset=this.config.offset,e},e._getContainer=function(){return!1===this.config.container?document.body:d.isElement(this.config.container)?o.default(this.config.container):o.default(document).find(this.config.container)},e._getAttachment=function(t){return z[t.toUpperCase()]},e._setListeners=function(){var t=this;this.config.trigger.split(" ").forEach((function(e){if("click"===e)o.default(t.element).on(t.constructor.Event.CLICK,t.config.selector,(function(e){return t.toggle(e)}));else if("manual"!==e){var n="hover"===e?t.constructor.Event.MOUSEENTER:t.constructor.Event.FOCUSIN,i="hover"===e?t.constructor.Event.MOUSELEAVE:t.constructor.Event.FOCUSOUT;o.default(t.element).on(n,t.config.selector,(function(e){return t._enter(e)})).on(i,t.config.selector,(function(e){return t._leave(e)}))}})),this._hideModalHandler=function(){t.element&&t.hide()},o.default(this.element).closest(".modal").on("hide.bs.modal",this._hideModalHandler),this.config.selector?this.config=l({},this.config,{trigger:"manual",selector:""}):this._fixTitle()},e._fixTitle=function(){var t=typeof this.element.getAttribute("data-original-title");(this.element.getAttribute("title")||"string"!==t)&&(this.element.setAttribute("data-original-title",this.element.getAttribute("title")||""),this.element.setAttribute("title",""))},e._enter=function(t,e){var n=this.constructor.DATA_KEY;(e=e||o.default(t.currentTarget).data(n))||(e=new this.constructor(t.currentTarget,this._getDelegateConfig()),o.default(t.currentTarget).data(n,e)),t&&(e._activeTrigger["focusin"===t.type?"focus":"hover"]=!0),o.default(e.getTipElement()).hasClass("show")||"show"===e._hoverState?e._hoverState="show":(clearTimeout(e._timeout),e._hoverState="show",e.config.delay&&e.config.delay.show?e._timeout=setTimeout((function(){"show"===e._hoverState&&e.show()}),e.config.delay.show):e.show())},e._leave=function(t,e){var n=this.constructor.DATA_KEY;(e=e||o.default(t.currentTarget).data(n))||(e=new this.constructor(t.currentTarget,this._getDelegateConfig()),o.default(t.currentTarget).data(n,e)),t&&(e._activeTrigger["focusout"===t.type?"focus":"hover"]=!1),e._isWithActiveTrigger()||(clearTimeout(e._timeout),e._hoverState="out",e.config.delay&&e.config.delay.hide?e._timeout=setTimeout((function(){"out"===e._hoverState&&e.hide()}),e.config.delay.hide):e.hide())},e._isWithActiveTrigger=function(){for(var t in this._activeTrigger)if(this._activeTrigger[t])return!0;return!1},e._getConfig=function(t){var e=o.default(this.element).data();return Object.keys(e).forEach((function(t){-1!==Y.indexOf(t)&&delete e[t]})),"number"==typeof(t=l({},this.constructor.Default,e,"object"==typeof t&&t?t:{})).delay&&(t.delay={show:t.delay,hide:t.delay}),"number"==typeof t.title&&(t.title=t.title.toString()),"number"==typeof t.content&&(t.content=t.content.toString()),d.typeCheckConfig(W,t,this.constructor.DefaultType),t.sanitize&&(t.template=Q(t.template,t.whiteList,t.sanitizeFn)),t},e._getDelegateConfig=function(){var t={};if(this.config)for(var e in this.config)this.constructor.Default[e]!==this.config[e]&&(t[e]=this.config[e]);return t},e._cleanTipClass=function(){var t=o.default(this.getTipElement()),e=t.attr("class").match(V);null!==e&&e.length&&t.removeClass(e.join(""))},e._handlePopperPlacementChange=function(t){this.tip=t.instance.popper,this._cleanTipClass(),this.addAttachmentClass(this._getAttachment(t.placement))},e._fixTransition=function(){var t=this.getTipElement(),e=this.config.animation;null===t.getAttribute("x-placement")&&(o.default(t).removeClass("fade"),this.config.animation=!1,this.hide(),this.show(),this.config.animation=e)},t._jQueryInterface=function(e){return this.each((function(){var n=o.default(this),i=n.data("bs.tooltip"),r="object"==typeof e&&e;if((i||!/dispose|hide/.test(e))&&(i||(i=new t(this,r),n.data("bs.tooltip",i)),"string"==typeof e)){if(void 0===i[e])throw new TypeError('No method named "'+e+'"');i[e]()}}))},s(t,null,[{key:"VERSION",get:function(){return"4.6.1"}},{key:"Default",get:function(){return K}},{key:"NAME",get:function(){return W}},{key:"DATA_KEY",get:function(){return"bs.tooltip"}},{key:"Event",get:function(){return $}},{key:"EVENT_KEY",get:function(){return".bs.tooltip"}},{key:"DefaultType",get:function(){return X}}]),t}();o.default.fn[W]=G._jQueryInterface,o.default.fn[W].Constructor=G,o.default.fn[W].noConflict=function(){return o.default.fn[W]=U,G._jQueryInterface};var J="popover",Z=o.default.fn[J],tt=new RegExp("(^|\\s)bs-popover\\S+","g"),et=l({},G.Default,{placement:"right",trigger:"click",content:"",template:''}),nt=l({},G.DefaultType,{content:"(string|element|function)"}),it={HIDE:"hide.bs.popover",HIDDEN:"hidden.bs.popover",SHOW:"show.bs.popover",SHOWN:"shown.bs.popover",INSERTED:"inserted.bs.popover",CLICK:"click.bs.popover",FOCUSIN:"focusin.bs.popover",FOCUSOUT:"focusout.bs.popover",MOUSEENTER:"mouseenter.bs.popover",MOUSELEAVE:"mouseleave.bs.popover"},ot=function(t){var e,n;function i(){return t.apply(this,arguments)||this}n=t,(e=i).prototype=Object.create(n.prototype),e.prototype.constructor=e,u(e,n);var r=i.prototype;return r.isWithContent=function(){return this.getTitle()||this._getContent()},r.addAttachmentClass=function(t){o.default(this.getTipElement()).addClass("bs-popover-"+t)},r.getTipElement=function(){return this.tip=this.tip||o.default(this.config.template)[0],this.tip},r.setContent=function(){var t=o.default(this.getTipElement());this.setElementContent(t.find(".popover-header"),this.getTitle());var e=this._getContent();"function"==typeof e&&(e=e.call(this.element)),this.setElementContent(t.find(".popover-body"),e),t.removeClass("fade show")},r._getContent=function(){return this.element.getAttribute("data-content")||this.config.content},r._cleanTipClass=function(){var t=o.default(this.getTipElement()),e=t.attr("class").match(tt);null!==e&&e.length>0&&t.removeClass(e.join(""))},i._jQueryInterface=function(t){return this.each((function(){var e=o.default(this).data("bs.popover"),n="object"==typeof t?t:null;if((e||!/dispose|hide/.test(t))&&(e||(e=new i(this,n),o.default(this).data("bs.popover",e)),"string"==typeof t)){if(void 0===e[t])throw new TypeError('No method named "'+t+'"');e[t]()}}))},s(i,null,[{key:"VERSION",get:function(){return"4.6.1"}},{key:"Default",get:function(){return et}},{key:"NAME",get:function(){return J}},{key:"DATA_KEY",get:function(){return"bs.popover"}},{key:"Event",get:function(){return it}},{key:"EVENT_KEY",get:function(){return".bs.popover"}},{key:"DefaultType",get:function(){return nt}}]),i}(G);o.default.fn[J]=ot._jQueryInterface,o.default.fn[J].Constructor=ot,o.default.fn[J].noConflict=function(){return o.default.fn[J]=Z,ot._jQueryInterface};var rt="scrollspy",at=o.default.fn[rt],st={offset:10,method:"auto",target:""},lt={offset:"number",method:"string",target:"(string|element)"},ut=function(){function t(t,e){var n=this;this._element=t,this._scrollElement="BODY"===t.tagName?window:t,this._config=this._getConfig(e),this._selector=this._config.target+" .nav-link,"+this._config.target+" .list-group-item,"+this._config.target+" .dropdown-item",this._offsets=[],this._targets=[],this._activeTarget=null,this._scrollHeight=0,o.default(this._scrollElement).on("scroll.bs.scrollspy",(function(t){return n._process(t)})),this.refresh(),this._process()}var e=t.prototype;return e.refresh=function(){var t=this,e=this._scrollElement===this._scrollElement.window?"offset":"position",n="auto"===this._config.method?e:this._config.method,i="position"===n?this._getScrollTop():0;this._offsets=[],this._targets=[],this._scrollHeight=this._getScrollHeight(),[].slice.call(document.querySelectorAll(this._selector)).map((function(t){var e,r=d.getSelectorFromElement(t);if(r&&(e=document.querySelector(r)),e){var a=e.getBoundingClientRect();if(a.width||a.height)return[o.default(e)[n]().top+i,r]}return null})).filter((function(t){return t})).sort((function(t,e){return t[0]-e[0]})).forEach((function(e){t._offsets.push(e[0]),t._targets.push(e[1])}))},e.dispose=function(){o.default.removeData(this._element,"bs.scrollspy"),o.default(this._scrollElement).off(".bs.scrollspy"),this._element=null,this._scrollElement=null,this._config=null,this._selector=null,this._offsets=null,this._targets=null,this._activeTarget=null,this._scrollHeight=null},e._getConfig=function(t){if("string"!=typeof(t=l({},st,"object"==typeof t&&t?t:{})).target&&d.isElement(t.target)){var e=o.default(t.target).attr("id");e||(e=d.getUID(rt),o.default(t.target).attr("id",e)),t.target="#"+e}return d.typeCheckConfig(rt,t,lt),t},e._getScrollTop=function(){return this._scrollElement===window?this._scrollElement.pageYOffset:this._scrollElement.scrollTop},e._getScrollHeight=function(){return this._scrollElement.scrollHeight||Math.max(document.body.scrollHeight,document.documentElement.scrollHeight)},e._getOffsetHeight=function(){return this._scrollElement===window?window.innerHeight:this._scrollElement.getBoundingClientRect().height},e._process=function(){var t=this._getScrollTop()+this._config.offset,e=this._getScrollHeight(),n=this._config.offset+e-this._getOffsetHeight();if(this._scrollHeight!==e&&this.refresh(),t>=n){var i=this._targets[this._targets.length-1];this._activeTarget!==i&&this._activate(i)}else{if(this._activeTarget&&t0)return this._activeTarget=null,void this._clear();for(var o=this._offsets.length;o--;)this._activeTarget!==this._targets[o]&&t>=this._offsets[o]&&(void 0===this._offsets[o+1]||t li > .active":".active";n=(n=o.default.makeArray(o.default(i).find(a)))[n.length-1]}var s=o.default.Event("hide.bs.tab",{relatedTarget:this._element}),l=o.default.Event("show.bs.tab",{relatedTarget:n});if(n&&o.default(n).trigger(s),o.default(this._element).trigger(l),!l.isDefaultPrevented()&&!s.isDefaultPrevented()){r&&(e=document.querySelector(r)),this._activate(this._element,i);var u=function(){var e=o.default.Event("hidden.bs.tab",{relatedTarget:t._element}),i=o.default.Event("shown.bs.tab",{relatedTarget:n});o.default(n).trigger(e),o.default(t._element).trigger(i)};e?this._activate(e,e.parentNode,u):u()}}},e.dispose=function(){o.default.removeData(this._element,"bs.tab"),this._element=null},e._activate=function(t,e,n){var i=this,r=(!e||"UL"!==e.nodeName&&"OL"!==e.nodeName?o.default(e).children(".active"):o.default(e).find("> li > .active"))[0],a=n&&r&&o.default(r).hasClass("fade"),s=function(){return i._transitionComplete(t,r,n)};if(r&&a){var l=d.getTransitionDurationFromElement(r);o.default(r).removeClass("show").one(d.TRANSITION_END,s).emulateTransitionEnd(l)}else s()},e._transitionComplete=function(t,e,n){if(e){o.default(e).removeClass("active");var i=o.default(e.parentNode).find("> .dropdown-menu .active")[0];i&&o.default(i).removeClass("active"),"tab"===e.getAttribute("role")&&e.setAttribute("aria-selected",!1)}o.default(t).addClass("active"),"tab"===t.getAttribute("role")&&t.setAttribute("aria-selected",!0),d.reflow(t),t.classList.contains("fade")&&t.classList.add("show");var r=t.parentNode;if(r&&"LI"===r.nodeName&&(r=r.parentNode),r&&o.default(r).hasClass("dropdown-menu")){var a=o.default(t).closest(".dropdown")[0];if(a){var s=[].slice.call(a.querySelectorAll(".dropdown-toggle"));o.default(s).addClass("active")}t.setAttribute("aria-expanded",!0)}n&&n()},t._jQueryInterface=function(e){return this.each((function(){var n=o.default(this),i=n.data("bs.tab");if(i||(i=new t(this),n.data("bs.tab",i)),"string"==typeof e){if(void 0===i[e])throw new TypeError('No method named "'+e+'"');i[e]()}}))},s(t,null,[{key:"VERSION",get:function(){return"4.6.1"}}]),t}();o.default(document).on("click.bs.tab.data-api",'[data-toggle="tab"], [data-toggle="pill"], [data-toggle="list"]',(function(t){t.preventDefault(),dt._jQueryInterface.call(o.default(this),"show")})),o.default.fn.tab=dt._jQueryInterface,o.default.fn.tab.Constructor=dt,o.default.fn.tab.noConflict=function(){return o.default.fn.tab=ft,dt._jQueryInterface};var ct="toast",ht=o.default.fn[ct],pt={animation:!0,autohide:!0,delay:500},mt={animation:"boolean",autohide:"boolean",delay:"number"},gt=function(){function t(t,e){this._element=t,this._config=this._getConfig(e),this._timeout=null,this._setListeners()}var e=t.prototype;return e.show=function(){var t=this,e=o.default.Event("show.bs.toast");if(o.default(this._element).trigger(e),!e.isDefaultPrevented()){this._clearTimeout(),this._config.animation&&this._element.classList.add("fade");var n=function(){t._element.classList.remove("showing"),t._element.classList.add("show"),o.default(t._element).trigger("shown.bs.toast"),t._config.autohide&&(t._timeout=setTimeout((function(){t.hide()}),t._config.delay))};if(this._element.classList.remove("hide"),d.reflow(this._element),this._element.classList.add("showing"),this._config.animation){var i=d.getTransitionDurationFromElement(this._element);o.default(this._element).one(d.TRANSITION_END,n).emulateTransitionEnd(i)}else n()}},e.hide=function(){if(this._element.classList.contains("show")){var t=o.default.Event("hide.bs.toast");o.default(this._element).trigger(t),t.isDefaultPrevented()||this._close()}},e.dispose=function(){this._clearTimeout(),this._element.classList.contains("show")&&this._element.classList.remove("show"),o.default(this._element).off("click.dismiss.bs.toast"),o.default.removeData(this._element,"bs.toast"),this._element=null,this._config=null},e._getConfig=function(t){return t=l({},pt,o.default(this._element).data(),"object"==typeof t&&t?t:{}),d.typeCheckConfig(ct,t,this.constructor.DefaultType),t},e._setListeners=function(){var t=this;o.default(this._element).on("click.dismiss.bs.toast",'[data-dismiss="toast"]',(function(){return t.hide()}))},e._close=function(){var t=this,e=function(){t._element.classList.add("hide"),o.default(t._element).trigger("hidden.bs.toast")};if(this._element.classList.remove("show"),this._config.animation){var n=d.getTransitionDurationFromElement(this._element);o.default(this._element).one(d.TRANSITION_END,e).emulateTransitionEnd(n)}else e()},e._clearTimeout=function(){clearTimeout(this._timeout),this._timeout=null},t._jQueryInterface=function(e){return this.each((function(){var n=o.default(this),i=n.data("bs.toast");if(i||(i=new t(this,"object"==typeof e&&e),n.data("bs.toast",i)),"string"==typeof e){if(void 0===i[e])throw new TypeError('No method named "'+e+'"');i[e](this)}}))},s(t,null,[{key:"VERSION",get:function(){return"4.6.1"}},{key:"DefaultType",get:function(){return mt}},{key:"Default",get:function(){return pt}}]),t}();o.default.fn[ct]=gt._jQueryInterface,o.default.fn[ct].Constructor=gt,o.default.fn[ct].noConflict=function(){return o.default.fn[ct]=ht,gt._jQueryInterface},t.Alert=h,t.Button=m,t.Carousel=E,t.Collapse=D,t.Dropdown=j,t.Modal=R,t.Popover=ot,t.Scrollspy=ut,t.Tab=dt,t.Toast=gt,t.Tooltip=G,t.Util=d,Object.defineProperty(t,"__esModule",{value:!0})}(e,n(0),n(1))},function(t,e){var n;n=function(){return this}();try{n=n||new Function("return this")()}catch(t){"object"==typeof window&&(n=window)}t.exports=n},function(t,e,n){"use strict";n.r(e);n(0),n(3),n.p;$((function(){var t=document.querySelector("div.bd-sidebar");let e=parseInt(sessionStorage.getItem("sidebar-scroll-top"),10);if(isNaN(e)){var n=document.getElementById("bd-docs-nav").querySelectorAll(".active");if(n.length>0){var i=n[n.length-1],o=i.getBoundingClientRect().y-t.getBoundingClientRect().y;if(i.getBoundingClientRect().y>.5*window.innerHeight){let e=.25;t.scrollTop=o-t.clientHeight*e,console.log("[PST]: Scrolled sidebar using last active link...")}}}else t.scrollTop=e,console.log("[PST]: Scrolled sidebar using stored browser position...");window.addEventListener("beforeunload",()=>{sessionStorage.setItem("sidebar-scroll-top",t.scrollTop)})})),$((function(){$(window).on("activate.bs.scrollspy",(function(){document.querySelectorAll("#bd-toc-nav a").forEach(t=>{t.parentElement.classList.remove("active")});document.querySelectorAll("#bd-toc-nav a.active").forEach(t=>{t.parentElement.classList.add("active")})}))}))}]); \ No newline at end of file diff --git a/_static/searchtools.js b/_static/searchtools.js new file mode 100644 index 000000000..2c774d17a --- /dev/null +++ b/_static/searchtools.js @@ -0,0 +1,632 @@ +/* + * Sphinx JavaScript utilities for the full-text search. + */ +"use strict"; + +/** + * Simple result scoring code. + */ +if (typeof Scorer === "undefined") { + var Scorer = { + // Implement the following function to further tweak the score for each result + // The function takes a result array [docname, title, anchor, descr, score, filename] + // and returns the new score. + /* + score: result => { + const [docname, title, anchor, descr, score, filename, kind] = result + return score + }, + */ + + // query matches the full name of an object + objNameMatch: 11, + // or matches in the last dotted part of the object name + objPartialMatch: 6, + // Additive scores depending on the priority of the object + objPrio: { + 0: 15, // used to be importantResults + 1: 5, // used to be objectResults + 2: -5, // used to be unimportantResults + }, + // Used when the priority is not in the mapping. + objPrioDefault: 0, + + // query found in title + title: 15, + partialTitle: 7, + // query found in terms + term: 5, + partialTerm: 2, + }; +} + +// Global search result kind enum, used by themes to style search results. +class SearchResultKind { + static get index() { return "index"; } + static get object() { return "object"; } + static get text() { return "text"; } + static get title() { return "title"; } +} + +const _removeChildren = (element) => { + while (element && element.lastChild) element.removeChild(element.lastChild); +}; + +/** + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping + */ +const _escapeRegExp = (string) => + string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string + +const _displayItem = (item, searchTerms, highlightTerms) => { + const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; + const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; + const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX; + const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; + const contentRoot = document.documentElement.dataset.content_root; + + const [docName, title, anchor, descr, score, _filename, kind] = item; + + let listItem = document.createElement("li"); + // Add a class representing the item's type: + // can be used by a theme's CSS selector for styling + // See SearchResultKind for the class names. + listItem.classList.add(`kind-${kind}`); + let requestUrl; + let linkUrl; + if (docBuilder === "dirhtml") { + // dirhtml builder + let dirname = docName + "/"; + if (dirname.match(/\/index\/$/)) + dirname = dirname.substring(0, dirname.length - 6); + else if (dirname === "index/") dirname = ""; + requestUrl = contentRoot + dirname; + linkUrl = requestUrl; + } else { + // normal html builders + requestUrl = contentRoot + docName + docFileSuffix; + linkUrl = docName + docLinkSuffix; + } + let linkEl = listItem.appendChild(document.createElement("a")); + linkEl.href = linkUrl + anchor; + linkEl.dataset.score = score; + linkEl.innerHTML = title; + if (descr) { + listItem.appendChild(document.createElement("span")).innerHTML = + " (" + descr + ")"; + // highlight search terms in the description + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + } + else if (showSearchSummary) + fetch(requestUrl) + .then((responseData) => responseData.text()) + .then((data) => { + if (data) + listItem.appendChild( + Search.makeSearchSummary(data, searchTerms, anchor) + ); + // highlight search terms in the summary + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + }); + Search.output.appendChild(listItem); +}; +const _finishSearch = (resultCount) => { + Search.stopPulse(); + Search.title.innerText = _("Search Results"); + if (!resultCount) + Search.status.innerText = Documentation.gettext( + "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories." + ); + else + Search.status.innerText = Documentation.ngettext( + "Search finished, found one page matching the search query.", + "Search finished, found ${resultCount} pages matching the search query.", + resultCount, + ).replace('${resultCount}', resultCount); +}; +const _displayNextItem = ( + results, + resultCount, + searchTerms, + highlightTerms, +) => { + // results left, load the summary and display it + // this is intended to be dynamic (don't sub resultsCount) + if (results.length) { + _displayItem(results.pop(), searchTerms, highlightTerms); + setTimeout( + () => _displayNextItem(results, resultCount, searchTerms, highlightTerms), + 5 + ); + } + // search finished, update title and status message + else _finishSearch(resultCount); +}; +// Helper function used by query() to order search results. +// Each input is an array of [docname, title, anchor, descr, score, filename, kind]. +// Order the results by score (in opposite order of appearance, since the +// `_displayNextItem` function uses pop() to retrieve items) and then alphabetically. +const _orderResultsByScoreThenName = (a, b) => { + const leftScore = a[4]; + const rightScore = b[4]; + if (leftScore === rightScore) { + // same score: sort alphabetically + const leftTitle = a[1].toLowerCase(); + const rightTitle = b[1].toLowerCase(); + if (leftTitle === rightTitle) return 0; + return leftTitle > rightTitle ? -1 : 1; // inverted is intentional + } + return leftScore > rightScore ? 1 : -1; +}; + +/** + * Default splitQuery function. Can be overridden in ``sphinx.search`` with a + * custom function per language. + * + * The regular expression works by splitting the string on consecutive characters + * that are not Unicode letters, numbers, underscores, or emoji characters. + * This is the same as ``\W+`` in Python, preserving the surrogate pair area. + */ +if (typeof splitQuery === "undefined") { + var splitQuery = (query) => query + .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) + .filter(term => term) // remove remaining empty strings +} + +/** + * Search Module + */ +const Search = { + _index: null, + _queued_query: null, + _pulse_status: -1, + + htmlToText: (htmlString, anchor) => { + const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); + for (const removalQuery of [".headerlink", "script", "style"]) { + htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() }); + } + if (anchor) { + const anchorContent = htmlElement.querySelector(`[role="main"] ${anchor}`); + if (anchorContent) return anchorContent.textContent; + + console.warn( + `Anchored content block not found. Sphinx search tries to obtain it via DOM query '[role=main] ${anchor}'. Check your theme or template.` + ); + } + + // if anchor not specified or not found, fall back to main content + const docContent = htmlElement.querySelector('[role="main"]'); + if (docContent) return docContent.textContent; + + console.warn( + "Content block not found. Sphinx search tries to obtain it via DOM query '[role=main]'. Check your theme or template." + ); + return ""; + }, + + init: () => { + const query = new URLSearchParams(window.location.search).get("q"); + document + .querySelectorAll('input[name="q"]') + .forEach((el) => (el.value = query)); + if (query) Search.performSearch(query); + }, + + loadIndex: (url) => + (document.body.appendChild(document.createElement("script")).src = url), + + setIndex: (index) => { + Search._index = index; + if (Search._queued_query !== null) { + const query = Search._queued_query; + Search._queued_query = null; + Search.query(query); + } + }, + + hasIndex: () => Search._index !== null, + + deferQuery: (query) => (Search._queued_query = query), + + stopPulse: () => (Search._pulse_status = -1), + + startPulse: () => { + if (Search._pulse_status >= 0) return; + + const pulse = () => { + Search._pulse_status = (Search._pulse_status + 1) % 4; + Search.dots.innerText = ".".repeat(Search._pulse_status); + if (Search._pulse_status >= 0) window.setTimeout(pulse, 500); + }; + pulse(); + }, + + /** + * perform a search for something (or wait until index is loaded) + */ + performSearch: (query) => { + // create the required interface elements + const searchText = document.createElement("h2"); + searchText.textContent = _("Searching"); + const searchSummary = document.createElement("p"); + searchSummary.classList.add("search-summary"); + searchSummary.innerText = ""; + const searchList = document.createElement("ul"); + searchList.setAttribute("role", "list"); + searchList.classList.add("search"); + + const out = document.getElementById("search-results"); + Search.title = out.appendChild(searchText); + Search.dots = Search.title.appendChild(document.createElement("span")); + Search.status = out.appendChild(searchSummary); + Search.output = out.appendChild(searchList); + + const searchProgress = document.getElementById("search-progress"); + // Some themes don't use the search progress node + if (searchProgress) { + searchProgress.innerText = _("Preparing search..."); + } + Search.startPulse(); + + // index already loaded, the browser was quick! + if (Search.hasIndex()) Search.query(query); + else Search.deferQuery(query); + }, + + _parseQuery: (query) => { + // stem the search terms and add them to the correct list + const stemmer = new Stemmer(); + const searchTerms = new Set(); + const excludedTerms = new Set(); + const highlightTerms = new Set(); + const objectTerms = new Set(splitQuery(query.toLowerCase().trim())); + splitQuery(query.trim()).forEach((queryTerm) => { + const queryTermLower = queryTerm.toLowerCase(); + + // maybe skip this "word" + // stopwords array is from language_data.js + if ( + stopwords.indexOf(queryTermLower) !== -1 || + queryTerm.match(/^\d+$/) + ) + return; + + // stem the word + let word = stemmer.stemWord(queryTermLower); + // select the correct list + if (word[0] === "-") excludedTerms.add(word.substr(1)); + else { + searchTerms.add(word); + highlightTerms.add(queryTermLower); + } + }); + + if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js + localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" ")) + } + + // console.debug("SEARCH: searching for:"); + // console.info("required: ", [...searchTerms]); + // console.info("excluded: ", [...excludedTerms]); + + return [query, searchTerms, excludedTerms, highlightTerms, objectTerms]; + }, + + /** + * execute search (requires search index to be loaded) + */ + _performSearch: (query, searchTerms, excludedTerms, highlightTerms, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + const allTitles = Search._index.alltitles; + const indexEntries = Search._index.indexentries; + + // Collect multiple result groups to be sorted separately and then ordered. + // Each is an array of [docname, title, anchor, descr, score, filename, kind]. + const normalResults = []; + const nonMainIndexResults = []; + + _removeChildren(document.getElementById("search-progress")); + + const queryLower = query.toLowerCase().trim(); + for (const [title, foundTitles] of Object.entries(allTitles)) { + if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) { + for (const [file, id] of foundTitles) { + const score = Math.round(Scorer.title * queryLower.length / title.length); + const boost = titles[file] === title ? 1 : 0; // add a boost for document titles + normalResults.push([ + docNames[file], + titles[file] !== title ? `${titles[file]} > ${title}` : title, + id !== null ? "#" + id : "", + null, + score + boost, + filenames[file], + SearchResultKind.title, + ]); + } + } + } + + // search for explicit entries in index directives + for (const [entry, foundEntries] of Object.entries(indexEntries)) { + if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { + for (const [file, id, isMain] of foundEntries) { + const score = Math.round(100 * queryLower.length / entry.length); + const result = [ + docNames[file], + titles[file], + id ? "#" + id : "", + null, + score, + filenames[file], + SearchResultKind.index, + ]; + if (isMain) { + normalResults.push(result); + } else { + nonMainIndexResults.push(result); + } + } + } + } + + // lookup as object + objectTerms.forEach((term) => + normalResults.push(...Search.performObjectSearch(term, objectTerms)) + ); + + // lookup as search terms in fulltext + normalResults.push(...Search.performTermsSearch(searchTerms, excludedTerms)); + + // let the scorer override scores with a custom scoring function + if (Scorer.score) { + normalResults.forEach((item) => (item[4] = Scorer.score(item))); + nonMainIndexResults.forEach((item) => (item[4] = Scorer.score(item))); + } + + // Sort each group of results by score and then alphabetically by name. + normalResults.sort(_orderResultsByScoreThenName); + nonMainIndexResults.sort(_orderResultsByScoreThenName); + + // Combine the result groups in (reverse) order. + // Non-main index entries are typically arbitrary cross-references, + // so display them after other results. + let results = [...nonMainIndexResults, ...normalResults]; + + // remove duplicate search results + // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept + let seen = new Set(); + results = results.reverse().reduce((acc, result) => { + let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(','); + if (!seen.has(resultStr)) { + acc.push(result); + seen.add(resultStr); + } + return acc; + }, []); + + return results.reverse(); + }, + + query: (query) => { + const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms] = Search._parseQuery(query); + const results = Search._performSearch(searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms); + + // for debugging + //Search.lastresults = results.slice(); // a copy + // console.info("search results:", Search.lastresults); + + // print the results + _displayNextItem(results, results.length, searchTerms, highlightTerms); + }, + + /** + * search for object names + */ + performObjectSearch: (object, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const objects = Search._index.objects; + const objNames = Search._index.objnames; + const titles = Search._index.titles; + + const results = []; + + const objectSearchCallback = (prefix, match) => { + const name = match[4] + const fullname = (prefix ? prefix + "." : "") + name; + const fullnameLower = fullname.toLowerCase(); + if (fullnameLower.indexOf(object) < 0) return; + + let score = 0; + const parts = fullnameLower.split("."); + + // check for different match types: exact matches of full name or + // "last name" (i.e. last dotted part) + if (fullnameLower === object || parts.slice(-1)[0] === object) + score += Scorer.objNameMatch; + else if (parts.slice(-1)[0].indexOf(object) > -1) + score += Scorer.objPartialMatch; // matches in last name + + const objName = objNames[match[1]][2]; + const title = titles[match[0]]; + + // If more than one term searched for, we require other words to be + // found in the name/title/description + const otherTerms = new Set(objectTerms); + otherTerms.delete(object); + if (otherTerms.size > 0) { + const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase(); + if ( + [...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0) + ) + return; + } + + let anchor = match[3]; + if (anchor === "") anchor = fullname; + else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname; + + const descr = objName + _(", in ") + title; + + // add custom score for some objects according to scorer + if (Scorer.objPrio.hasOwnProperty(match[2])) + score += Scorer.objPrio[match[2]]; + else score += Scorer.objPrioDefault; + + results.push([ + docNames[match[0]], + fullname, + "#" + anchor, + descr, + score, + filenames[match[0]], + SearchResultKind.object, + ]); + }; + Object.keys(objects).forEach((prefix) => + objects[prefix].forEach((array) => + objectSearchCallback(prefix, array) + ) + ); + return results; + }, + + /** + * search for full-text terms in the index + */ + performTermsSearch: (searchTerms, excludedTerms) => { + // prepare search + const terms = Search._index.terms; + const titleTerms = Search._index.titleterms; + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + + const scoreMap = new Map(); + const fileMap = new Map(); + + // perform the search on the required terms + searchTerms.forEach((word) => { + const files = []; + const arr = [ + { files: terms[word], score: Scorer.term }, + { files: titleTerms[word], score: Scorer.title }, + ]; + // add support for partial matches + if (word.length > 2) { + const escapedWord = _escapeRegExp(word); + if (!terms.hasOwnProperty(word)) { + Object.keys(terms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: terms[term], score: Scorer.partialTerm }); + }); + } + if (!titleTerms.hasOwnProperty(word)) { + Object.keys(titleTerms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: titleTerms[term], score: Scorer.partialTitle }); + }); + } + } + + // no match but word was a required one + if (arr.every((record) => record.files === undefined)) return; + + // found search word in contents + arr.forEach((record) => { + if (record.files === undefined) return; + + let recordFiles = record.files; + if (recordFiles.length === undefined) recordFiles = [recordFiles]; + files.push(...recordFiles); + + // set score for the word in each file + recordFiles.forEach((file) => { + if (!scoreMap.has(file)) scoreMap.set(file, {}); + scoreMap.get(file)[word] = record.score; + }); + }); + + // create the mapping + files.forEach((file) => { + if (!fileMap.has(file)) fileMap.set(file, [word]); + else if (fileMap.get(file).indexOf(word) === -1) fileMap.get(file).push(word); + }); + }); + + // now check if the files don't contain excluded terms + const results = []; + for (const [file, wordList] of fileMap) { + // check if all requirements are matched + + // as search terms with length < 3 are discarded + const filteredTermCount = [...searchTerms].filter( + (term) => term.length > 2 + ).length; + if ( + wordList.length !== searchTerms.size && + wordList.length !== filteredTermCount + ) + continue; + + // ensure that none of the excluded terms is in the search result + if ( + [...excludedTerms].some( + (term) => + terms[term] === file || + titleTerms[term] === file || + (terms[term] || []).includes(file) || + (titleTerms[term] || []).includes(file) + ) + ) + break; + + // select one (max) score for the file. + const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w])); + // add result to the result list + results.push([ + docNames[file], + titles[file], + "", + null, + score, + filenames[file], + SearchResultKind.text, + ]); + } + return results; + }, + + /** + * helper function to return a node containing the + * search summary for a given text. keywords is a list + * of stemmed words. + */ + makeSearchSummary: (htmlText, keywords, anchor) => { + const text = Search.htmlToText(htmlText, anchor); + if (text === "") return null; + + const textLower = text.toLowerCase(); + const actualStartPosition = [...keywords] + .map((k) => textLower.indexOf(k.toLowerCase())) + .filter((i) => i > -1) + .slice(-1)[0]; + const startWithContext = Math.max(actualStartPosition - 120, 0); + + const top = startWithContext === 0 ? "" : "..."; + const tail = startWithContext + 240 < text.length ? "..." : ""; + + let summary = document.createElement("p"); + summary.classList.add("context"); + summary.textContent = top + text.substr(startWithContext, 240).trim() + tail; + + return summary; + }, +}; + +_ready(Search.init); diff --git a/_static/sphinx_highlight.js b/_static/sphinx_highlight.js new file mode 100644 index 000000000..8a96c69a1 --- /dev/null +++ b/_static/sphinx_highlight.js @@ -0,0 +1,154 @@ +/* Highlighting utilities for Sphinx HTML documentation. */ +"use strict"; + +const SPHINX_HIGHLIGHT_ENABLED = true + +/** + * highlight a given string on a node by wrapping it in + * span elements with the given class name. + */ +const _highlight = (node, addItems, text, className) => { + if (node.nodeType === Node.TEXT_NODE) { + const val = node.nodeValue; + const parent = node.parentNode; + const pos = val.toLowerCase().indexOf(text); + if ( + pos >= 0 && + !parent.classList.contains(className) && + !parent.classList.contains("nohighlight") + ) { + let span; + + const closestNode = parent.closest("body, svg, foreignObject"); + const isInSVG = closestNode && closestNode.matches("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.classList.add(className); + } + + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + const rest = document.createTextNode(val.substr(pos + text.length)); + parent.insertBefore( + span, + parent.insertBefore( + rest, + node.nextSibling + ) + ); + node.nodeValue = val.substr(0, pos); + /* There may be more occurrences of search term in this node. So call this + * function recursively on the remaining fragment. + */ + _highlight(rest, addItems, text, className); + + if (isInSVG) { + const rect = document.createElementNS( + "http://www.w3.org/2000/svg", + "rect" + ); + const bbox = parent.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute("class", className); + addItems.push({ parent: parent, target: rect }); + } + } + } else if (node.matches && !node.matches("button, select, textarea")) { + node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); + } +}; +const _highlightText = (thisNode, text, className) => { + let addItems = []; + _highlight(thisNode, addItems, text, className); + addItems.forEach((obj) => + obj.parent.insertAdjacentElement("beforebegin", obj.target) + ); +}; + +/** + * Small JavaScript module for the documentation. + */ +const SphinxHighlight = { + + /** + * highlight the search words provided in localstorage in the text + */ + highlightSearchWords: () => { + if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight + + // get and clear terms from localstorage + const url = new URL(window.location); + const highlight = + localStorage.getItem("sphinx_highlight_terms") + || url.searchParams.get("highlight") + || ""; + localStorage.removeItem("sphinx_highlight_terms") + url.searchParams.delete("highlight"); + window.history.replaceState({}, "", url); + + // get individual terms from highlight string + const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); + if (terms.length === 0) return; // nothing to do + + // There should never be more than one element matching "div.body" + const divBody = document.querySelectorAll("div.body"); + const body = divBody.length ? divBody[0] : document.querySelector("body"); + window.setTimeout(() => { + terms.forEach((term) => _highlightText(body, term, "highlighted")); + }, 10); + + const searchBox = document.getElementById("searchbox"); + if (searchBox === null) return; + searchBox.appendChild( + document + .createRange() + .createContextualFragment( + '" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/_static/styles/pydata-sphinx-theme.css b/_static/styles/pydata-sphinx-theme.css new file mode 100644 index 000000000..c39ccf7a3 --- /dev/null +++ b/_static/styles/pydata-sphinx-theme.css @@ -0,0 +1,6 @@ +/*! + * Bootstrap v4.6.1 (https://getbootstrap.com/) + * Copyright 2011-2021 The Bootstrap Authors + * Copyright 2011-2021 Twitter, Inc. + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) + */:root{--blue:#007bff;--indigo:#6610f2;--purple:#6f42c1;--pink:#e83e8c;--red:#dc3545;--orange:#fd7e14;--yellow:#ffc107;--green:#28a745;--teal:#20c997;--cyan:#17a2b8;--white:#fff;--gray:#6c757d;--gray-dark:#343a40;--primary:#007bff;--secondary:#6c757d;--success:#28a745;--info:#17a2b8;--warning:#ffc107;--danger:#dc3545;--light:#f8f9fa;--dark:#343a40;--breakpoint-xs:0;--breakpoint-sm:540px;--breakpoint-md:720px;--breakpoint-lg:960px;--breakpoint-xl:1200px;--font-family-sans-serif:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans","Liberation Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";--font-family-monospace:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace}*,:after,:before{box-sizing:border-box}html{font-family:sans-serif;line-height:1.15;-webkit-text-size-adjust:100%;-webkit-tap-highlight-color:rgba(0,0,0,0)}article,aside,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}body{margin:0;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,Liberation Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji;font-size:1rem;line-height:1.5;color:#212529;text-align:left}[tabindex="-1"]:focus:not(:focus-visible){outline:0!important}hr{box-sizing:content-box;height:0;overflow:visible}h1,h2,h3,h4,h5,h6{margin-top:0;margin-bottom:.5rem}p{margin-top:0;margin-bottom:1rem}abbr[data-original-title],abbr[title]{text-decoration:underline;text-decoration:underline dotted;cursor:help;border-bottom:0;text-decoration-skip-ink:none}address{font-style:normal;line-height:inherit}address,dl,ol,ul{margin-bottom:1rem}dl,ol,ul{margin-top:0}ol ol,ol ul,ul ol,ul ul{margin-bottom:0}dt{font-weight:700}dd{margin-bottom:.5rem;margin-left:0}blockquote{margin:0 0 1rem}b,strong{font-weight:bolder}small{font-size:80%}sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}a{color:#007bff;background-color:transparent}a:hover{color:#0056b3}a:not([href]):not([class]),a:not([href]):not([class]):hover{color:inherit;text-decoration:none}code,kbd,pre,samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace;font-size:1em}pre{margin-top:0;margin-bottom:1rem;overflow:auto;-ms-overflow-style:scrollbar}figure{margin:0 0 1rem}img{border-style:none}img,svg{vertical-align:middle}svg{overflow:hidden}table{border-collapse:collapse}caption{padding-top:.75rem;padding-bottom:.75rem;color:#6c757d;text-align:left;caption-side:bottom}th{text-align:inherit;text-align:-webkit-match-parent}label{display:inline-block;margin-bottom:.5rem}button{border-radius:0}button:focus:not(:focus-visible){outline:0}button,input,optgroup,select,textarea{margin:0;font-family:inherit;font-size:inherit;line-height:inherit}button,input{overflow:visible}button,select{text-transform:none}[role=button]{cursor:pointer}select{word-wrap:normal}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}[type=button]:not(:disabled),[type=reset]:not(:disabled),[type=submit]:not(:disabled),button:not(:disabled){cursor:pointer}[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner,button::-moz-focus-inner{padding:0;border-style:none}input[type=checkbox],input[type=radio]{box-sizing:border-box;padding:0}textarea{overflow:auto;resize:vertical}fieldset{min-width:0;padding:0;margin:0;border:0}legend{display:block;width:100%;max-width:100%;padding:0;margin-bottom:.5rem;font-size:1.5rem;line-height:inherit;color:inherit;white-space:normal}progress{vertical-align:baseline}[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto}[type=search]{outline-offset:-2px;-webkit-appearance:none}[type=search]::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{font:inherit;-webkit-appearance:button}output{display:inline-block}summary{display:list-item;cursor:pointer}template{display:none}[hidden]{display:none!important}.h1,.h2,.h3,.h4,.h5,.h6,h1,h2,h3,h4,h5,h6{margin-bottom:.5rem;font-weight:500;line-height:1.2}.h1,h1{font-size:2.5rem}.h2,h2{font-size:2rem}.h3,h3{font-size:1.75rem}.h4,h4{font-size:1.5rem}.h5,h5{font-size:1.25rem}.h6,h6{font-size:1rem}.lead{font-size:1.25rem;font-weight:300}.display-1{font-size:6rem}.display-1,.display-2{font-weight:300;line-height:1.2}.display-2{font-size:5.5rem}.display-3{font-size:4.5rem}.display-3,.display-4{font-weight:300;line-height:1.2}.display-4{font-size:3.5rem}hr{margin-top:1rem;margin-bottom:1rem;border-top:1px solid rgba(0,0,0,.1)}.small,small{font-size:80%;font-weight:400}.mark,mark{padding:.2em;background-color:#fcf8e3}.list-inline,.list-unstyled{padding-left:0;list-style:none}.list-inline-item{display:inline-block}.list-inline-item:not(:last-child){margin-right:.5rem}.initialism{font-size:90%;text-transform:uppercase}.blockquote{margin-bottom:1rem;font-size:1.25rem}.blockquote-footer{display:block;font-size:80%;color:#6c757d}.blockquote-footer:before{content:"\2014\00A0"}.img-fluid,.img-thumbnail{max-width:100%;height:auto}.img-thumbnail{padding:.25rem;background-color:#fff;border:1px solid #dee2e6;border-radius:.25rem}.figure{display:inline-block}.figure-img{margin-bottom:.5rem;line-height:1}.figure-caption{font-size:90%;color:#6c757d}code{font-size:87.5%;color:#e83e8c;word-wrap:break-word}a>code{color:inherit}kbd{padding:.2rem .4rem;font-size:87.5%;color:#fff;background-color:#212529;border-radius:.2rem}kbd kbd{padding:0;font-size:100%;font-weight:700}pre{display:block;font-size:87.5%;color:#212529}pre code{font-size:inherit;color:inherit;word-break:normal}.pre-scrollable{max-height:340px;overflow-y:scroll}.container,.container-fluid,.container-lg,.container-md,.container-sm,.container-xl{width:100%;padding-right:15px;padding-left:15px;margin-right:auto;margin-left:auto}@media (min-width:540px){.container,.container-sm{max-width:540px}}@media (min-width:720px){.container,.container-md,.container-sm{max-width:720px}}@media (min-width:960px){.container,.container-lg,.container-md,.container-sm{max-width:960px}}@media (min-width:1200px){.container,.container-lg,.container-md,.container-sm,.container-xl{max-width:1400px}}.row{display:flex;flex-wrap:wrap;margin-right:-15px;margin-left:-15px}.no-gutters{margin-right:0;margin-left:0}.no-gutters>.col,.no-gutters>[class*=col-]{padding-right:0;padding-left:0}.col,.col-1,.col-2,.col-3,.col-4,.col-5,.col-6,.col-7,.col-8,.col-9,.col-10,.col-11,.col-12,.col-auto,.col-lg,.col-lg-1,.col-lg-2,.col-lg-3,.col-lg-4,.col-lg-5,.col-lg-6,.col-lg-7,.col-lg-8,.col-lg-9,.col-lg-10,.col-lg-11,.col-lg-12,.col-lg-auto,.col-md,.col-md-1,.col-md-2,.col-md-3,.col-md-4,.col-md-5,.col-md-6,.col-md-7,.col-md-8,.col-md-9,.col-md-10,.col-md-11,.col-md-12,.col-md-auto,.col-sm,.col-sm-1,.col-sm-2,.col-sm-3,.col-sm-4,.col-sm-5,.col-sm-6,.col-sm-7,.col-sm-8,.col-sm-9,.col-sm-10,.col-sm-11,.col-sm-12,.col-sm-auto,.col-xl,.col-xl-1,.col-xl-2,.col-xl-3,.col-xl-4,.col-xl-5,.col-xl-6,.col-xl-7,.col-xl-8,.col-xl-9,.col-xl-10,.col-xl-11,.col-xl-12,.col-xl-auto{position:relative;width:100%;padding-right:15px;padding-left:15px}.col{flex-basis:0;flex-grow:1;max-width:100%}.row-cols-1>*{flex:0 0 100%;max-width:100%}.row-cols-2>*{flex:0 0 50%;max-width:50%}.row-cols-3>*{flex:0 0 33.33333%;max-width:33.33333%}.row-cols-4>*{flex:0 0 25%;max-width:25%}.row-cols-5>*{flex:0 0 20%;max-width:20%}.row-cols-6>*{flex:0 0 16.66667%;max-width:16.66667%}.col-auto{flex:0 0 auto;width:auto;max-width:100%}.col-1{flex:0 0 8.33333%;max-width:8.33333%}.col-2{flex:0 0 16.66667%;max-width:16.66667%}.col-3{flex:0 0 25%;max-width:25%}.col-4{flex:0 0 33.33333%;max-width:33.33333%}.col-5{flex:0 0 41.66667%;max-width:41.66667%}.col-6{flex:0 0 50%;max-width:50%}.col-7{flex:0 0 58.33333%;max-width:58.33333%}.col-8{flex:0 0 66.66667%;max-width:66.66667%}.col-9{flex:0 0 75%;max-width:75%}.col-10{flex:0 0 83.33333%;max-width:83.33333%}.col-11{flex:0 0 91.66667%;max-width:91.66667%}.col-12{flex:0 0 100%;max-width:100%}.order-first{order:-1}.order-last{order:13}.order-0{order:0}.order-1{order:1}.order-2{order:2}.order-3{order:3}.order-4{order:4}.order-5{order:5}.order-6{order:6}.order-7{order:7}.order-8{order:8}.order-9{order:9}.order-10{order:10}.order-11{order:11}.order-12{order:12}.offset-1{margin-left:8.33333%}.offset-2{margin-left:16.66667%}.offset-3{margin-left:25%}.offset-4{margin-left:33.33333%}.offset-5{margin-left:41.66667%}.offset-6{margin-left:50%}.offset-7{margin-left:58.33333%}.offset-8{margin-left:66.66667%}.offset-9{margin-left:75%}.offset-10{margin-left:83.33333%}.offset-11{margin-left:91.66667%}@media (min-width:540px){.col-sm{flex-basis:0;flex-grow:1;max-width:100%}.row-cols-sm-1>*{flex:0 0 100%;max-width:100%}.row-cols-sm-2>*{flex:0 0 50%;max-width:50%}.row-cols-sm-3>*{flex:0 0 33.33333%;max-width:33.33333%}.row-cols-sm-4>*{flex:0 0 25%;max-width:25%}.row-cols-sm-5>*{flex:0 0 20%;max-width:20%}.row-cols-sm-6>*{flex:0 0 16.66667%;max-width:16.66667%}.col-sm-auto{flex:0 0 auto;width:auto;max-width:100%}.col-sm-1{flex:0 0 8.33333%;max-width:8.33333%}.col-sm-2{flex:0 0 16.66667%;max-width:16.66667%}.col-sm-3{flex:0 0 25%;max-width:25%}.col-sm-4{flex:0 0 33.33333%;max-width:33.33333%}.col-sm-5{flex:0 0 41.66667%;max-width:41.66667%}.col-sm-6{flex:0 0 50%;max-width:50%}.col-sm-7{flex:0 0 58.33333%;max-width:58.33333%}.col-sm-8{flex:0 0 66.66667%;max-width:66.66667%}.col-sm-9{flex:0 0 75%;max-width:75%}.col-sm-10{flex:0 0 83.33333%;max-width:83.33333%}.col-sm-11{flex:0 0 91.66667%;max-width:91.66667%}.col-sm-12{flex:0 0 100%;max-width:100%}.order-sm-first{order:-1}.order-sm-last{order:13}.order-sm-0{order:0}.order-sm-1{order:1}.order-sm-2{order:2}.order-sm-3{order:3}.order-sm-4{order:4}.order-sm-5{order:5}.order-sm-6{order:6}.order-sm-7{order:7}.order-sm-8{order:8}.order-sm-9{order:9}.order-sm-10{order:10}.order-sm-11{order:11}.order-sm-12{order:12}.offset-sm-0{margin-left:0}.offset-sm-1{margin-left:8.33333%}.offset-sm-2{margin-left:16.66667%}.offset-sm-3{margin-left:25%}.offset-sm-4{margin-left:33.33333%}.offset-sm-5{margin-left:41.66667%}.offset-sm-6{margin-left:50%}.offset-sm-7{margin-left:58.33333%}.offset-sm-8{margin-left:66.66667%}.offset-sm-9{margin-left:75%}.offset-sm-10{margin-left:83.33333%}.offset-sm-11{margin-left:91.66667%}}@media (min-width:720px){.col-md{flex-basis:0;flex-grow:1;max-width:100%}.row-cols-md-1>*{flex:0 0 100%;max-width:100%}.row-cols-md-2>*{flex:0 0 50%;max-width:50%}.row-cols-md-3>*{flex:0 0 33.33333%;max-width:33.33333%}.row-cols-md-4>*{flex:0 0 25%;max-width:25%}.row-cols-md-5>*{flex:0 0 20%;max-width:20%}.row-cols-md-6>*{flex:0 0 16.66667%;max-width:16.66667%}.col-md-auto{flex:0 0 auto;width:auto;max-width:100%}.col-md-1{flex:0 0 8.33333%;max-width:8.33333%}.col-md-2{flex:0 0 16.66667%;max-width:16.66667%}.col-md-3{flex:0 0 25%;max-width:25%}.col-md-4{flex:0 0 33.33333%;max-width:33.33333%}.col-md-5{flex:0 0 41.66667%;max-width:41.66667%}.col-md-6{flex:0 0 50%;max-width:50%}.col-md-7{flex:0 0 58.33333%;max-width:58.33333%}.col-md-8{flex:0 0 66.66667%;max-width:66.66667%}.col-md-9{flex:0 0 75%;max-width:75%}.col-md-10{flex:0 0 83.33333%;max-width:83.33333%}.col-md-11{flex:0 0 91.66667%;max-width:91.66667%}.col-md-12{flex:0 0 100%;max-width:100%}.order-md-first{order:-1}.order-md-last{order:13}.order-md-0{order:0}.order-md-1{order:1}.order-md-2{order:2}.order-md-3{order:3}.order-md-4{order:4}.order-md-5{order:5}.order-md-6{order:6}.order-md-7{order:7}.order-md-8{order:8}.order-md-9{order:9}.order-md-10{order:10}.order-md-11{order:11}.order-md-12{order:12}.offset-md-0{margin-left:0}.offset-md-1{margin-left:8.33333%}.offset-md-2{margin-left:16.66667%}.offset-md-3{margin-left:25%}.offset-md-4{margin-left:33.33333%}.offset-md-5{margin-left:41.66667%}.offset-md-6{margin-left:50%}.offset-md-7{margin-left:58.33333%}.offset-md-8{margin-left:66.66667%}.offset-md-9{margin-left:75%}.offset-md-10{margin-left:83.33333%}.offset-md-11{margin-left:91.66667%}}@media (min-width:960px){.col-lg{flex-basis:0;flex-grow:1;max-width:100%}.row-cols-lg-1>*{flex:0 0 100%;max-width:100%}.row-cols-lg-2>*{flex:0 0 50%;max-width:50%}.row-cols-lg-3>*{flex:0 0 33.33333%;max-width:33.33333%}.row-cols-lg-4>*{flex:0 0 25%;max-width:25%}.row-cols-lg-5>*{flex:0 0 20%;max-width:20%}.row-cols-lg-6>*{flex:0 0 16.66667%;max-width:16.66667%}.col-lg-auto{flex:0 0 auto;width:auto;max-width:100%}.col-lg-1{flex:0 0 8.33333%;max-width:8.33333%}.col-lg-2{flex:0 0 16.66667%;max-width:16.66667%}.col-lg-3{flex:0 0 25%;max-width:25%}.col-lg-4{flex:0 0 33.33333%;max-width:33.33333%}.col-lg-5{flex:0 0 41.66667%;max-width:41.66667%}.col-lg-6{flex:0 0 50%;max-width:50%}.col-lg-7{flex:0 0 58.33333%;max-width:58.33333%}.col-lg-8{flex:0 0 66.66667%;max-width:66.66667%}.col-lg-9{flex:0 0 75%;max-width:75%}.col-lg-10{flex:0 0 83.33333%;max-width:83.33333%}.col-lg-11{flex:0 0 91.66667%;max-width:91.66667%}.col-lg-12{flex:0 0 100%;max-width:100%}.order-lg-first{order:-1}.order-lg-last{order:13}.order-lg-0{order:0}.order-lg-1{order:1}.order-lg-2{order:2}.order-lg-3{order:3}.order-lg-4{order:4}.order-lg-5{order:5}.order-lg-6{order:6}.order-lg-7{order:7}.order-lg-8{order:8}.order-lg-9{order:9}.order-lg-10{order:10}.order-lg-11{order:11}.order-lg-12{order:12}.offset-lg-0{margin-left:0}.offset-lg-1{margin-left:8.33333%}.offset-lg-2{margin-left:16.66667%}.offset-lg-3{margin-left:25%}.offset-lg-4{margin-left:33.33333%}.offset-lg-5{margin-left:41.66667%}.offset-lg-6{margin-left:50%}.offset-lg-7{margin-left:58.33333%}.offset-lg-8{margin-left:66.66667%}.offset-lg-9{margin-left:75%}.offset-lg-10{margin-left:83.33333%}.offset-lg-11{margin-left:91.66667%}}@media (min-width:1200px){.col-xl{flex-basis:0;flex-grow:1;max-width:100%}.row-cols-xl-1>*{flex:0 0 100%;max-width:100%}.row-cols-xl-2>*{flex:0 0 50%;max-width:50%}.row-cols-xl-3>*{flex:0 0 33.33333%;max-width:33.33333%}.row-cols-xl-4>*{flex:0 0 25%;max-width:25%}.row-cols-xl-5>*{flex:0 0 20%;max-width:20%}.row-cols-xl-6>*{flex:0 0 16.66667%;max-width:16.66667%}.col-xl-auto{flex:0 0 auto;width:auto;max-width:100%}.col-xl-1{flex:0 0 8.33333%;max-width:8.33333%}.col-xl-2{flex:0 0 16.66667%;max-width:16.66667%}.col-xl-3{flex:0 0 25%;max-width:25%}.col-xl-4{flex:0 0 33.33333%;max-width:33.33333%}.col-xl-5{flex:0 0 41.66667%;max-width:41.66667%}.col-xl-6{flex:0 0 50%;max-width:50%}.col-xl-7{flex:0 0 58.33333%;max-width:58.33333%}.col-xl-8{flex:0 0 66.66667%;max-width:66.66667%}.col-xl-9{flex:0 0 75%;max-width:75%}.col-xl-10{flex:0 0 83.33333%;max-width:83.33333%}.col-xl-11{flex:0 0 91.66667%;max-width:91.66667%}.col-xl-12{flex:0 0 100%;max-width:100%}.order-xl-first{order:-1}.order-xl-last{order:13}.order-xl-0{order:0}.order-xl-1{order:1}.order-xl-2{order:2}.order-xl-3{order:3}.order-xl-4{order:4}.order-xl-5{order:5}.order-xl-6{order:6}.order-xl-7{order:7}.order-xl-8{order:8}.order-xl-9{order:9}.order-xl-10{order:10}.order-xl-11{order:11}.order-xl-12{order:12}.offset-xl-0{margin-left:0}.offset-xl-1{margin-left:8.33333%}.offset-xl-2{margin-left:16.66667%}.offset-xl-3{margin-left:25%}.offset-xl-4{margin-left:33.33333%}.offset-xl-5{margin-left:41.66667%}.offset-xl-6{margin-left:50%}.offset-xl-7{margin-left:58.33333%}.offset-xl-8{margin-left:66.66667%}.offset-xl-9{margin-left:75%}.offset-xl-10{margin-left:83.33333%}.offset-xl-11{margin-left:91.66667%}}.table{width:100%;margin-bottom:1rem;color:#212529}.table td,.table th{padding:.75rem;vertical-align:top;border-top:1px solid #dee2e6}.table thead th{vertical-align:bottom;border-bottom:2px solid #dee2e6}.table tbody+tbody{border-top:2px solid #dee2e6}.table-sm td,.table-sm th{padding:.3rem}.table-bordered,.table-bordered td,.table-bordered th{border:1px solid #dee2e6}.table-bordered thead td,.table-bordered thead th{border-bottom-width:2px}.table-borderless tbody+tbody,.table-borderless td,.table-borderless th,.table-borderless thead th{border:0}.table-striped tbody tr:nth-of-type(odd){background-color:rgba(0,0,0,.05)}.table-hover tbody tr:hover{color:#212529;background-color:rgba(0,0,0,.075)}.table-primary,.table-primary>td,.table-primary>th{background-color:#b8daff}.table-primary tbody+tbody,.table-primary td,.table-primary th,.table-primary thead th{border-color:#7abaff}.table-hover .table-primary:hover,.table-hover .table-primary:hover>td,.table-hover .table-primary:hover>th{background-color:#9fcdff}.table-secondary,.table-secondary>td,.table-secondary>th{background-color:#d6d8db}.table-secondary tbody+tbody,.table-secondary td,.table-secondary th,.table-secondary thead th{border-color:#b3b7bb}.table-hover .table-secondary:hover,.table-hover .table-secondary:hover>td,.table-hover .table-secondary:hover>th{background-color:#c8cbcf}.table-success,.table-success>td,.table-success>th{background-color:#c3e6cb}.table-success tbody+tbody,.table-success td,.table-success th,.table-success thead th{border-color:#8fd19e}.table-hover .table-success:hover,.table-hover .table-success:hover>td,.table-hover .table-success:hover>th{background-color:#b1dfbb}.table-info,.table-info>td,.table-info>th{background-color:#bee5eb}.table-info tbody+tbody,.table-info td,.table-info th,.table-info thead th{border-color:#86cfda}.table-hover .table-info:hover,.table-hover .table-info:hover>td,.table-hover .table-info:hover>th{background-color:#abdde5}.table-warning,.table-warning>td,.table-warning>th{background-color:#ffeeba}.table-warning tbody+tbody,.table-warning td,.table-warning th,.table-warning thead th{border-color:#ffdf7e}.table-hover .table-warning:hover,.table-hover .table-warning:hover>td,.table-hover .table-warning:hover>th{background-color:#ffe8a1}.table-danger,.table-danger>td,.table-danger>th{background-color:#f5c6cb}.table-danger tbody+tbody,.table-danger td,.table-danger th,.table-danger thead th{border-color:#ed969e}.table-hover .table-danger:hover,.table-hover .table-danger:hover>td,.table-hover .table-danger:hover>th{background-color:#f1b0b7}.table-light,.table-light>td,.table-light>th{background-color:#fdfdfe}.table-light tbody+tbody,.table-light td,.table-light th,.table-light thead th{border-color:#fbfcfc}.table-hover .table-light:hover,.table-hover .table-light:hover>td,.table-hover .table-light:hover>th{background-color:#ececf6}.table-dark,.table-dark>td,.table-dark>th{background-color:#c6c8ca}.table-dark tbody+tbody,.table-dark td,.table-dark th,.table-dark thead th{border-color:#95999c}.table-hover .table-dark:hover,.table-hover .table-dark:hover>td,.table-hover .table-dark:hover>th{background-color:#b9bbbe}.table-active,.table-active>td,.table-active>th,.table-hover .table-active:hover,.table-hover .table-active:hover>td,.table-hover .table-active:hover>th{background-color:rgba(0,0,0,.075)}.table .thead-dark th{color:#fff;background-color:#343a40;border-color:#454d55}.table .thead-light th{color:#495057;background-color:#e9ecef;border-color:#dee2e6}.table-dark{color:#fff;background-color:#343a40}.table-dark td,.table-dark th,.table-dark thead th{border-color:#454d55}.table-dark.table-bordered{border:0}.table-dark.table-striped tbody tr:nth-of-type(odd){background-color:hsla(0,0%,100%,.05)}.table-dark.table-hover tbody tr:hover{color:#fff;background-color:hsla(0,0%,100%,.075)}@media (max-width:539.98px){.table-responsive-sm{display:block;width:100%;overflow-x:auto;-webkit-overflow-scrolling:touch}.table-responsive-sm>.table-bordered{border:0}}@media (max-width:719.98px){.table-responsive-md{display:block;width:100%;overflow-x:auto;-webkit-overflow-scrolling:touch}.table-responsive-md>.table-bordered{border:0}}@media (max-width:959.98px){.table-responsive-lg{display:block;width:100%;overflow-x:auto;-webkit-overflow-scrolling:touch}.table-responsive-lg>.table-bordered{border:0}}@media (max-width:1199.98px){.table-responsive-xl{display:block;width:100%;overflow-x:auto;-webkit-overflow-scrolling:touch}.table-responsive-xl>.table-bordered{border:0}}.table-responsive{display:block;width:100%;overflow-x:auto;-webkit-overflow-scrolling:touch}.table-responsive>.table-bordered{border:0}.form-control{display:block;width:100%;height:calc(1.5em + .75rem + 2px);padding:.375rem .75rem;font-size:1rem;font-weight:400;line-height:1.5;color:#495057;background-color:#fff;background-clip:padding-box;border:1px solid #ced4da;border-radius:.25rem;transition:border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media (prefers-reduced-motion:reduce){.form-control{transition:none}}.form-control::-ms-expand{background-color:transparent;border:0}.form-control:focus{color:#495057;background-color:#fff;border-color:#80bdff;outline:0;box-shadow:0 0 0 .2rem rgba(0,123,255,.25)}.form-control::placeholder{color:#6c757d;opacity:1}.form-control:disabled,.form-control[readonly]{background-color:#e9ecef;opacity:1}input[type=date].form-control,input[type=datetime-local].form-control,input[type=month].form-control,input[type=time].form-control{appearance:none}select.form-control:-moz-focusring{color:transparent;text-shadow:0 0 0 #495057}select.form-control:focus::-ms-value{color:#495057;background-color:#fff}.form-control-file,.form-control-range{display:block;width:100%}.col-form-label{padding-top:calc(.375rem + 1px);padding-bottom:calc(.375rem + 1px);margin-bottom:0;font-size:inherit;line-height:1.5}.col-form-label-lg{padding-top:calc(.5rem + 1px);padding-bottom:calc(.5rem + 1px);font-size:1.25rem;line-height:1.5}.col-form-label-sm{padding-top:calc(.25rem + 1px);padding-bottom:calc(.25rem + 1px);font-size:.875rem;line-height:1.5}.form-control-plaintext{display:block;width:100%;padding:.375rem 0;margin-bottom:0;font-size:1rem;line-height:1.5;color:#212529;background-color:transparent;border:solid transparent;border-width:1px 0}.form-control-plaintext.form-control-lg,.form-control-plaintext.form-control-sm{padding-right:0;padding-left:0}.form-control-sm{height:calc(1.5em + .5rem + 2px);padding:.25rem .5rem;font-size:.875rem;line-height:1.5;border-radius:.2rem}.form-control-lg{height:calc(1.5em + 1rem + 2px);padding:.5rem 1rem;font-size:1.25rem;line-height:1.5;border-radius:.3rem}select.form-control[multiple],select.form-control[size],textarea.form-control{height:auto}.form-group{margin-bottom:1rem}.form-text{display:block;margin-top:.25rem}.form-row{display:flex;flex-wrap:wrap;margin-right:-5px;margin-left:-5px}.form-row>.col,.form-row>[class*=col-]{padding-right:5px;padding-left:5px}.form-check{position:relative;display:block;padding-left:1.25rem}.form-check-input{position:absolute;margin-top:.3rem;margin-left:-1.25rem}.form-check-input:disabled~.form-check-label,.form-check-input[disabled]~.form-check-label{color:#6c757d}.form-check-label{margin-bottom:0}.form-check-inline{display:inline-flex;align-items:center;padding-left:0;margin-right:.75rem}.form-check-inline .form-check-input{position:static;margin-top:0;margin-right:.3125rem;margin-left:0}.valid-feedback{display:none;width:100%;margin-top:.25rem;font-size:80%;color:#28a745}.valid-tooltip{position:absolute;top:100%;left:0;z-index:5;display:none;max-width:100%;padding:.25rem .5rem;margin-top:.1rem;font-size:.875rem;line-height:1.5;color:#fff;background-color:rgba(40,167,69,.9);border-radius:.25rem}.form-row>.col>.valid-tooltip,.form-row>[class*=col-]>.valid-tooltip{left:5px}.is-valid~.valid-feedback,.is-valid~.valid-tooltip,.was-validated :valid~.valid-feedback,.was-validated :valid~.valid-tooltip{display:block}.form-control.is-valid,.was-validated .form-control:valid{border-color:#28a745;padding-right:calc(1.5em + .75rem)!important;background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='8' height='8'%3E%3Cpath fill='%2328a745' d='M2.3 6.73L.6 4.53c-.4-1.04.46-1.4 1.1-.8l1.1 1.4 3.4-3.8c.6-.63 1.6-.27 1.2.7l-4 4.6c-.43.5-.8.4-1.1.1z'/%3E%3C/svg%3E");background-repeat:no-repeat;background-position:right calc(.375em + .1875rem) center;background-size:calc(.75em + .375rem) calc(.75em + .375rem)}.form-control.is-valid:focus,.was-validated .form-control:valid:focus{border-color:#28a745;box-shadow:0 0 0 .2rem rgba(40,167,69,.25)}.was-validated select.form-control:valid,select.form-control.is-valid{padding-right:3rem!important;background-position:right 1.5rem center}.was-validated textarea.form-control:valid,textarea.form-control.is-valid{padding-right:calc(1.5em + .75rem);background-position:top calc(.375em + .1875rem) right calc(.375em + .1875rem)}.custom-select.is-valid,.was-validated .custom-select:valid{border-color:#28a745;padding-right:calc(.75em + 2.3125rem)!important;background:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='4' height='5'%3E%3Cpath fill='%23343a40' d='M2 0L0 2h4zm0 5L0 3h4z'/%3E%3C/svg%3E") right .75rem center/8px 10px no-repeat,#fff url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='8' height='8'%3E%3Cpath fill='%2328a745' d='M2.3 6.73L.6 4.53c-.4-1.04.46-1.4 1.1-.8l1.1 1.4 3.4-3.8c.6-.63 1.6-.27 1.2.7l-4 4.6c-.43.5-.8.4-1.1.1z'/%3E%3C/svg%3E") center right 1.75rem/calc(.75em + .375rem) calc(.75em + .375rem) no-repeat}.custom-select.is-valid:focus,.was-validated .custom-select:valid:focus{border-color:#28a745;box-shadow:0 0 0 .2rem rgba(40,167,69,.25)}.form-check-input.is-valid~.form-check-label,.was-validated .form-check-input:valid~.form-check-label{color:#28a745}.form-check-input.is-valid~.valid-feedback,.form-check-input.is-valid~.valid-tooltip,.was-validated .form-check-input:valid~.valid-feedback,.was-validated .form-check-input:valid~.valid-tooltip{display:block}.custom-control-input.is-valid~.custom-control-label,.was-validated .custom-control-input:valid~.custom-control-label{color:#28a745}.custom-control-input.is-valid~.custom-control-label:before,.was-validated .custom-control-input:valid~.custom-control-label:before{border-color:#28a745}.custom-control-input.is-valid:checked~.custom-control-label:before,.was-validated .custom-control-input:valid:checked~.custom-control-label:before{border-color:#34ce57;background-color:#34ce57}.custom-control-input.is-valid:focus~.custom-control-label:before,.was-validated .custom-control-input:valid:focus~.custom-control-label:before{box-shadow:0 0 0 .2rem rgba(40,167,69,.25)}.custom-control-input.is-valid:focus:not(:checked)~.custom-control-label:before,.custom-file-input.is-valid~.custom-file-label,.was-validated .custom-control-input:valid:focus:not(:checked)~.custom-control-label:before,.was-validated .custom-file-input:valid~.custom-file-label{border-color:#28a745}.custom-file-input.is-valid:focus~.custom-file-label,.was-validated .custom-file-input:valid:focus~.custom-file-label{border-color:#28a745;box-shadow:0 0 0 .2rem rgba(40,167,69,.25)}.invalid-feedback{display:none;width:100%;margin-top:.25rem;font-size:80%;color:#dc3545}.invalid-tooltip{position:absolute;top:100%;left:0;z-index:5;display:none;max-width:100%;padding:.25rem .5rem;margin-top:.1rem;font-size:.875rem;line-height:1.5;color:#fff;background-color:rgba(220,53,69,.9);border-radius:.25rem}.form-row>.col>.invalid-tooltip,.form-row>[class*=col-]>.invalid-tooltip{left:5px}.is-invalid~.invalid-feedback,.is-invalid~.invalid-tooltip,.was-validated :invalid~.invalid-feedback,.was-validated :invalid~.invalid-tooltip{display:block}.form-control.is-invalid,.was-validated .form-control:invalid{border-color:#dc3545;padding-right:calc(1.5em + .75rem)!important;background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' fill='none' stroke='%23dc3545'%3E%3Ccircle cx='6' cy='6' r='4.5'/%3E%3Cpath stroke-linejoin='round' d='M5.8 3.6h.4L6 6.5z'/%3E%3Ccircle cx='6' cy='8.2' r='.6' fill='%23dc3545' stroke='none'/%3E%3C/svg%3E");background-repeat:no-repeat;background-position:right calc(.375em + .1875rem) center;background-size:calc(.75em + .375rem) calc(.75em + .375rem)}.form-control.is-invalid:focus,.was-validated .form-control:invalid:focus{border-color:#dc3545;box-shadow:0 0 0 .2rem rgba(220,53,69,.25)}.was-validated select.form-control:invalid,select.form-control.is-invalid{padding-right:3rem!important;background-position:right 1.5rem center}.was-validated textarea.form-control:invalid,textarea.form-control.is-invalid{padding-right:calc(1.5em + .75rem);background-position:top calc(.375em + .1875rem) right calc(.375em + .1875rem)}.custom-select.is-invalid,.was-validated .custom-select:invalid{border-color:#dc3545;padding-right:calc(.75em + 2.3125rem)!important;background:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='4' height='5'%3E%3Cpath fill='%23343a40' d='M2 0L0 2h4zm0 5L0 3h4z'/%3E%3C/svg%3E") right .75rem center/8px 10px no-repeat,#fff url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' fill='none' stroke='%23dc3545'%3E%3Ccircle cx='6' cy='6' r='4.5'/%3E%3Cpath stroke-linejoin='round' d='M5.8 3.6h.4L6 6.5z'/%3E%3Ccircle cx='6' cy='8.2' r='.6' fill='%23dc3545' stroke='none'/%3E%3C/svg%3E") center right 1.75rem/calc(.75em + .375rem) calc(.75em + .375rem) no-repeat}.custom-select.is-invalid:focus,.was-validated .custom-select:invalid:focus{border-color:#dc3545;box-shadow:0 0 0 .2rem rgba(220,53,69,.25)}.form-check-input.is-invalid~.form-check-label,.was-validated .form-check-input:invalid~.form-check-label{color:#dc3545}.form-check-input.is-invalid~.invalid-feedback,.form-check-input.is-invalid~.invalid-tooltip,.was-validated .form-check-input:invalid~.invalid-feedback,.was-validated .form-check-input:invalid~.invalid-tooltip{display:block}.custom-control-input.is-invalid~.custom-control-label,.was-validated .custom-control-input:invalid~.custom-control-label{color:#dc3545}.custom-control-input.is-invalid~.custom-control-label:before,.was-validated .custom-control-input:invalid~.custom-control-label:before{border-color:#dc3545}.custom-control-input.is-invalid:checked~.custom-control-label:before,.was-validated .custom-control-input:invalid:checked~.custom-control-label:before{border-color:#e4606d;background-color:#e4606d}.custom-control-input.is-invalid:focus~.custom-control-label:before,.was-validated .custom-control-input:invalid:focus~.custom-control-label:before{box-shadow:0 0 0 .2rem rgba(220,53,69,.25)}.custom-control-input.is-invalid:focus:not(:checked)~.custom-control-label:before,.custom-file-input.is-invalid~.custom-file-label,.was-validated .custom-control-input:invalid:focus:not(:checked)~.custom-control-label:before,.was-validated .custom-file-input:invalid~.custom-file-label{border-color:#dc3545}.custom-file-input.is-invalid:focus~.custom-file-label,.was-validated .custom-file-input:invalid:focus~.custom-file-label{border-color:#dc3545;box-shadow:0 0 0 .2rem rgba(220,53,69,.25)}.form-inline{display:flex;flex-flow:row wrap;align-items:center}.form-inline .form-check{width:100%}@media (min-width:540px){.form-inline label{justify-content:center}.form-inline .form-group,.form-inline label{display:flex;align-items:center;margin-bottom:0}.form-inline .form-group{flex:0 0 auto;flex-flow:row wrap}.form-inline .form-control{display:inline-block;width:auto;vertical-align:middle}.form-inline .form-control-plaintext{display:inline-block}.form-inline .custom-select,.form-inline .input-group{width:auto}.form-inline .form-check{display:flex;align-items:center;justify-content:center;width:auto;padding-left:0}.form-inline .form-check-input{position:relative;flex-shrink:0;margin-top:0;margin-right:.25rem;margin-left:0}.form-inline .custom-control{align-items:center;justify-content:center}.form-inline .custom-control-label{margin-bottom:0}}.btn{display:inline-block;font-weight:400;color:#212529;text-align:center;vertical-align:middle;user-select:none;background-color:transparent;border:1px solid transparent;padding:.375rem .75rem;font-size:1rem;line-height:1.5;border-radius:.25rem;transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media (prefers-reduced-motion:reduce){.btn{transition:none}}.btn:hover{color:#212529;text-decoration:none}.btn.focus,.btn:focus{outline:0;box-shadow:0 0 0 .2rem rgba(0,123,255,.25)}.btn.disabled,.btn:disabled{opacity:.65}.btn:not(:disabled):not(.disabled){cursor:pointer}a.btn.disabled,fieldset:disabled a.btn{pointer-events:none}.btn-primary{color:#fff;background-color:#007bff;border-color:#007bff}.btn-primary.focus,.btn-primary:focus,.btn-primary:hover{color:#fff;background-color:#0069d9;border-color:#0062cc}.btn-primary.focus,.btn-primary:focus{box-shadow:0 0 0 .2rem rgba(38,143,255,.5)}.btn-primary.disabled,.btn-primary:disabled{color:#fff;background-color:#007bff;border-color:#007bff}.btn-primary:not(:disabled):not(.disabled).active,.btn-primary:not(:disabled):not(.disabled):active,.show>.btn-primary.dropdown-toggle{color:#fff;background-color:#0062cc;border-color:#005cbf}.btn-primary:not(:disabled):not(.disabled).active:focus,.btn-primary:not(:disabled):not(.disabled):active:focus,.show>.btn-primary.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(38,143,255,.5)}.btn-secondary{color:#fff;background-color:#6c757d;border-color:#6c757d}.btn-secondary.focus,.btn-secondary:focus,.btn-secondary:hover{color:#fff;background-color:#5a6268;border-color:#545b62}.btn-secondary.focus,.btn-secondary:focus{box-shadow:0 0 0 .2rem rgba(130,138,145,.5)}.btn-secondary.disabled,.btn-secondary:disabled{color:#fff;background-color:#6c757d;border-color:#6c757d}.btn-secondary:not(:disabled):not(.disabled).active,.btn-secondary:not(:disabled):not(.disabled):active,.show>.btn-secondary.dropdown-toggle{color:#fff;background-color:#545b62;border-color:#4e555b}.btn-secondary:not(:disabled):not(.disabled).active:focus,.btn-secondary:not(:disabled):not(.disabled):active:focus,.show>.btn-secondary.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(130,138,145,.5)}.btn-success{color:#fff;background-color:#28a745;border-color:#28a745}.btn-success.focus,.btn-success:focus,.btn-success:hover{color:#fff;background-color:#218838;border-color:#1e7e34}.btn-success.focus,.btn-success:focus{box-shadow:0 0 0 .2rem rgba(72,180,97,.5)}.btn-success.disabled,.btn-success:disabled{color:#fff;background-color:#28a745;border-color:#28a745}.btn-success:not(:disabled):not(.disabled).active,.btn-success:not(:disabled):not(.disabled):active,.show>.btn-success.dropdown-toggle{color:#fff;background-color:#1e7e34;border-color:#1c7430}.btn-success:not(:disabled):not(.disabled).active:focus,.btn-success:not(:disabled):not(.disabled):active:focus,.show>.btn-success.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(72,180,97,.5)}.btn-info{color:#fff;background-color:#17a2b8;border-color:#17a2b8}.btn-info.focus,.btn-info:focus,.btn-info:hover{color:#fff;background-color:#138496;border-color:#117a8b}.btn-info.focus,.btn-info:focus{box-shadow:0 0 0 .2rem rgba(58,176,195,.5)}.btn-info.disabled,.btn-info:disabled{color:#fff;background-color:#17a2b8;border-color:#17a2b8}.btn-info:not(:disabled):not(.disabled).active,.btn-info:not(:disabled):not(.disabled):active,.show>.btn-info.dropdown-toggle{color:#fff;background-color:#117a8b;border-color:#10707f}.btn-info:not(:disabled):not(.disabled).active:focus,.btn-info:not(:disabled):not(.disabled):active:focus,.show>.btn-info.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(58,176,195,.5)}.btn-warning{color:#212529;background-color:#ffc107;border-color:#ffc107}.btn-warning.focus,.btn-warning:focus,.btn-warning:hover{color:#212529;background-color:#e0a800;border-color:#d39e00}.btn-warning.focus,.btn-warning:focus{box-shadow:0 0 0 .2rem rgba(222,170,12,.5)}.btn-warning.disabled,.btn-warning:disabled{color:#212529;background-color:#ffc107;border-color:#ffc107}.btn-warning:not(:disabled):not(.disabled).active,.btn-warning:not(:disabled):not(.disabled):active,.show>.btn-warning.dropdown-toggle{color:#212529;background-color:#d39e00;border-color:#c69500}.btn-warning:not(:disabled):not(.disabled).active:focus,.btn-warning:not(:disabled):not(.disabled):active:focus,.show>.btn-warning.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(222,170,12,.5)}.btn-danger{color:#fff;background-color:#dc3545;border-color:#dc3545}.btn-danger.focus,.btn-danger:focus,.btn-danger:hover{color:#fff;background-color:#c82333;border-color:#bd2130}.btn-danger.focus,.btn-danger:focus{box-shadow:0 0 0 .2rem rgba(225,83,97,.5)}.btn-danger.disabled,.btn-danger:disabled{color:#fff;background-color:#dc3545;border-color:#dc3545}.btn-danger:not(:disabled):not(.disabled).active,.btn-danger:not(:disabled):not(.disabled):active,.show>.btn-danger.dropdown-toggle{color:#fff;background-color:#bd2130;border-color:#b21f2d}.btn-danger:not(:disabled):not(.disabled).active:focus,.btn-danger:not(:disabled):not(.disabled):active:focus,.show>.btn-danger.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(225,83,97,.5)}.btn-light{color:#212529;background-color:#f8f9fa;border-color:#f8f9fa}.btn-light.focus,.btn-light:focus,.btn-light:hover{color:#212529;background-color:#e2e6ea;border-color:#dae0e5}.btn-light.focus,.btn-light:focus{box-shadow:0 0 0 .2rem rgba(216,217,219,.5)}.btn-light.disabled,.btn-light:disabled{color:#212529;background-color:#f8f9fa;border-color:#f8f9fa}.btn-light:not(:disabled):not(.disabled).active,.btn-light:not(:disabled):not(.disabled):active,.show>.btn-light.dropdown-toggle{color:#212529;background-color:#dae0e5;border-color:#d3d9df}.btn-light:not(:disabled):not(.disabled).active:focus,.btn-light:not(:disabled):not(.disabled):active:focus,.show>.btn-light.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(216,217,219,.5)}.btn-dark{color:#fff;background-color:#343a40;border-color:#343a40}.btn-dark.focus,.btn-dark:focus,.btn-dark:hover{color:#fff;background-color:#23272b;border-color:#1d2124}.btn-dark.focus,.btn-dark:focus{box-shadow:0 0 0 .2rem rgba(82,88,93,.5)}.btn-dark.disabled,.btn-dark:disabled{color:#fff;background-color:#343a40;border-color:#343a40}.btn-dark:not(:disabled):not(.disabled).active,.btn-dark:not(:disabled):not(.disabled):active,.show>.btn-dark.dropdown-toggle{color:#fff;background-color:#1d2124;border-color:#171a1d}.btn-dark:not(:disabled):not(.disabled).active:focus,.btn-dark:not(:disabled):not(.disabled):active:focus,.show>.btn-dark.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(82,88,93,.5)}.btn-outline-primary{color:#007bff;border-color:#007bff}.btn-outline-primary:hover{color:#fff;background-color:#007bff;border-color:#007bff}.btn-outline-primary.focus,.btn-outline-primary:focus{box-shadow:0 0 0 .2rem rgba(0,123,255,.5)}.btn-outline-primary.disabled,.btn-outline-primary:disabled{color:#007bff;background-color:transparent}.btn-outline-primary:not(:disabled):not(.disabled).active,.btn-outline-primary:not(:disabled):not(.disabled):active,.show>.btn-outline-primary.dropdown-toggle{color:#fff;background-color:#007bff;border-color:#007bff}.btn-outline-primary:not(:disabled):not(.disabled).active:focus,.btn-outline-primary:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-primary.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(0,123,255,.5)}.btn-outline-secondary{color:#6c757d;border-color:#6c757d}.btn-outline-secondary:hover{color:#fff;background-color:#6c757d;border-color:#6c757d}.btn-outline-secondary.focus,.btn-outline-secondary:focus{box-shadow:0 0 0 .2rem rgba(108,117,125,.5)}.btn-outline-secondary.disabled,.btn-outline-secondary:disabled{color:#6c757d;background-color:transparent}.btn-outline-secondary:not(:disabled):not(.disabled).active,.btn-outline-secondary:not(:disabled):not(.disabled):active,.show>.btn-outline-secondary.dropdown-toggle{color:#fff;background-color:#6c757d;border-color:#6c757d}.btn-outline-secondary:not(:disabled):not(.disabled).active:focus,.btn-outline-secondary:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-secondary.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(108,117,125,.5)}.btn-outline-success{color:#28a745;border-color:#28a745}.btn-outline-success:hover{color:#fff;background-color:#28a745;border-color:#28a745}.btn-outline-success.focus,.btn-outline-success:focus{box-shadow:0 0 0 .2rem rgba(40,167,69,.5)}.btn-outline-success.disabled,.btn-outline-success:disabled{color:#28a745;background-color:transparent}.btn-outline-success:not(:disabled):not(.disabled).active,.btn-outline-success:not(:disabled):not(.disabled):active,.show>.btn-outline-success.dropdown-toggle{color:#fff;background-color:#28a745;border-color:#28a745}.btn-outline-success:not(:disabled):not(.disabled).active:focus,.btn-outline-success:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-success.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(40,167,69,.5)}.btn-outline-info{color:#17a2b8;border-color:#17a2b8}.btn-outline-info:hover{color:#fff;background-color:#17a2b8;border-color:#17a2b8}.btn-outline-info.focus,.btn-outline-info:focus{box-shadow:0 0 0 .2rem rgba(23,162,184,.5)}.btn-outline-info.disabled,.btn-outline-info:disabled{color:#17a2b8;background-color:transparent}.btn-outline-info:not(:disabled):not(.disabled).active,.btn-outline-info:not(:disabled):not(.disabled):active,.show>.btn-outline-info.dropdown-toggle{color:#fff;background-color:#17a2b8;border-color:#17a2b8}.btn-outline-info:not(:disabled):not(.disabled).active:focus,.btn-outline-info:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-info.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(23,162,184,.5)}.btn-outline-warning{color:#ffc107;border-color:#ffc107}.btn-outline-warning:hover{color:#212529;background-color:#ffc107;border-color:#ffc107}.btn-outline-warning.focus,.btn-outline-warning:focus{box-shadow:0 0 0 .2rem rgba(255,193,7,.5)}.btn-outline-warning.disabled,.btn-outline-warning:disabled{color:#ffc107;background-color:transparent}.btn-outline-warning:not(:disabled):not(.disabled).active,.btn-outline-warning:not(:disabled):not(.disabled):active,.show>.btn-outline-warning.dropdown-toggle{color:#212529;background-color:#ffc107;border-color:#ffc107}.btn-outline-warning:not(:disabled):not(.disabled).active:focus,.btn-outline-warning:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-warning.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(255,193,7,.5)}.btn-outline-danger{color:#dc3545;border-color:#dc3545}.btn-outline-danger:hover{color:#fff;background-color:#dc3545;border-color:#dc3545}.btn-outline-danger.focus,.btn-outline-danger:focus{box-shadow:0 0 0 .2rem rgba(220,53,69,.5)}.btn-outline-danger.disabled,.btn-outline-danger:disabled{color:#dc3545;background-color:transparent}.btn-outline-danger:not(:disabled):not(.disabled).active,.btn-outline-danger:not(:disabled):not(.disabled):active,.show>.btn-outline-danger.dropdown-toggle{color:#fff;background-color:#dc3545;border-color:#dc3545}.btn-outline-danger:not(:disabled):not(.disabled).active:focus,.btn-outline-danger:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-danger.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(220,53,69,.5)}.btn-outline-light{color:#f8f9fa;border-color:#f8f9fa}.btn-outline-light:hover{color:#212529;background-color:#f8f9fa;border-color:#f8f9fa}.btn-outline-light.focus,.btn-outline-light:focus{box-shadow:0 0 0 .2rem rgba(248,249,250,.5)}.btn-outline-light.disabled,.btn-outline-light:disabled{color:#f8f9fa;background-color:transparent}.btn-outline-light:not(:disabled):not(.disabled).active,.btn-outline-light:not(:disabled):not(.disabled):active,.show>.btn-outline-light.dropdown-toggle{color:#212529;background-color:#f8f9fa;border-color:#f8f9fa}.btn-outline-light:not(:disabled):not(.disabled).active:focus,.btn-outline-light:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-light.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(248,249,250,.5)}.btn-outline-dark{color:#343a40;border-color:#343a40}.btn-outline-dark:hover{color:#fff;background-color:#343a40;border-color:#343a40}.btn-outline-dark.focus,.btn-outline-dark:focus{box-shadow:0 0 0 .2rem rgba(52,58,64,.5)}.btn-outline-dark.disabled,.btn-outline-dark:disabled{color:#343a40;background-color:transparent}.btn-outline-dark:not(:disabled):not(.disabled).active,.btn-outline-dark:not(:disabled):not(.disabled):active,.show>.btn-outline-dark.dropdown-toggle{color:#fff;background-color:#343a40;border-color:#343a40}.btn-outline-dark:not(:disabled):not(.disabled).active:focus,.btn-outline-dark:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-dark.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(52,58,64,.5)}.btn-link{font-weight:400;color:#007bff;text-decoration:none}.btn-link:hover{color:#0056b3}.btn-link.focus,.btn-link:focus,.btn-link:hover{text-decoration:underline}.btn-link.disabled,.btn-link:disabled{color:#6c757d;pointer-events:none}.btn-group-lg>.btn,.btn-lg{padding:.5rem 1rem;font-size:1.25rem;line-height:1.5;border-radius:.3rem}.btn-group-sm>.btn,.btn-sm{padding:.25rem .5rem;font-size:.875rem;line-height:1.5;border-radius:.2rem}.btn-block{display:block;width:100%}.btn-block+.btn-block{margin-top:.5rem}input[type=button].btn-block,input[type=reset].btn-block,input[type=submit].btn-block{width:100%}.fade{transition:opacity .15s linear}@media (prefers-reduced-motion:reduce){.fade{transition:none}}.fade:not(.show){opacity:0}.collapse:not(.show){display:none}.collapsing{position:relative;height:0;overflow:hidden;transition:height .35s ease}@media (prefers-reduced-motion:reduce){.collapsing{transition:none}}.dropdown,.dropleft,.dropright,.dropup{position:relative}.dropdown-toggle{white-space:nowrap}.dropdown-toggle:after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:"";border-top:.3em solid;border-right:.3em solid transparent;border-bottom:0;border-left:.3em solid transparent}.dropdown-toggle:empty:after{margin-left:0}.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:10rem;padding:.5rem 0;margin:.125rem 0 0;font-size:1rem;color:#212529;text-align:left;list-style:none;background-color:#fff;background-clip:padding-box;border:1px solid rgba(0,0,0,.15);border-radius:.25rem}.dropdown-menu-left{right:auto;left:0}.dropdown-menu-right{right:0;left:auto}@media (min-width:540px){.dropdown-menu-sm-left{right:auto;left:0}.dropdown-menu-sm-right{right:0;left:auto}}@media (min-width:720px){.dropdown-menu-md-left{right:auto;left:0}.dropdown-menu-md-right{right:0;left:auto}}@media (min-width:960px){.dropdown-menu-lg-left{right:auto;left:0}.dropdown-menu-lg-right{right:0;left:auto}}@media (min-width:1200px){.dropdown-menu-xl-left{right:auto;left:0}.dropdown-menu-xl-right{right:0;left:auto}}.dropup .dropdown-menu{top:auto;bottom:100%;margin-top:0;margin-bottom:.125rem}.dropup .dropdown-toggle:after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:"";border-top:0;border-right:.3em solid transparent;border-bottom:.3em solid;border-left:.3em solid transparent}.dropup .dropdown-toggle:empty:after{margin-left:0}.dropright .dropdown-menu{top:0;right:auto;left:100%;margin-top:0;margin-left:.125rem}.dropright .dropdown-toggle:after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:"";border-top:.3em solid transparent;border-right:0;border-bottom:.3em solid transparent;border-left:.3em solid}.dropright .dropdown-toggle:empty:after{margin-left:0}.dropright .dropdown-toggle:after{vertical-align:0}.dropleft .dropdown-menu{top:0;right:100%;left:auto;margin-top:0;margin-right:.125rem}.dropleft .dropdown-toggle:after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:"";display:none}.dropleft .dropdown-toggle:before{display:inline-block;margin-right:.255em;vertical-align:.255em;content:"";border-top:.3em solid transparent;border-right:.3em solid;border-bottom:.3em solid transparent}.dropleft .dropdown-toggle:empty:after{margin-left:0}.dropleft .dropdown-toggle:before{vertical-align:0}.dropdown-menu[x-placement^=bottom],.dropdown-menu[x-placement^=left],.dropdown-menu[x-placement^=right],.dropdown-menu[x-placement^=top]{right:auto;bottom:auto}.dropdown-divider{height:0;margin:.5rem 0;overflow:hidden;border-top:1px solid #e9ecef}.dropdown-item{display:block;width:100%;padding:.25rem 1.5rem;clear:both;font-weight:400;color:#212529;text-align:inherit;white-space:nowrap;background-color:transparent;border:0}.dropdown-item:focus,.dropdown-item:hover{color:#16181b;text-decoration:none;background-color:#e9ecef}.dropdown-item.active,.dropdown-item:active{color:#fff;text-decoration:none;background-color:#007bff}.dropdown-item.disabled,.dropdown-item:disabled{color:#adb5bd;pointer-events:none;background-color:transparent}.dropdown-menu.show{display:block}.dropdown-header{display:block;padding:.5rem 1.5rem;margin-bottom:0;font-size:.875rem;color:#6c757d;white-space:nowrap}.dropdown-item-text{display:block;padding:.25rem 1.5rem;color:#212529}.btn-group,.btn-group-vertical{position:relative;display:inline-flex;vertical-align:middle}.btn-group-vertical>.btn,.btn-group>.btn{position:relative;flex:1 1 auto}.btn-group-vertical>.btn.active,.btn-group-vertical>.btn:active,.btn-group-vertical>.btn:focus,.btn-group-vertical>.btn:hover,.btn-group>.btn.active,.btn-group>.btn:active,.btn-group>.btn:focus,.btn-group>.btn:hover{z-index:1}.btn-toolbar{display:flex;flex-wrap:wrap;justify-content:flex-start}.btn-toolbar .input-group{width:auto}.btn-group>.btn-group:not(:first-child),.btn-group>.btn:not(:first-child){margin-left:-1px}.btn-group>.btn-group:not(:last-child)>.btn,.btn-group>.btn:not(:last-child):not(.dropdown-toggle){border-top-right-radius:0;border-bottom-right-radius:0}.btn-group>.btn-group:not(:first-child)>.btn,.btn-group>.btn:not(:first-child){border-top-left-radius:0;border-bottom-left-radius:0}.dropdown-toggle-split{padding-right:.5625rem;padding-left:.5625rem}.dropdown-toggle-split:after,.dropright .dropdown-toggle-split:after,.dropup .dropdown-toggle-split:after{margin-left:0}.dropleft .dropdown-toggle-split:before{margin-right:0}.btn-group-sm>.btn+.dropdown-toggle-split,.btn-sm+.dropdown-toggle-split{padding-right:.375rem;padding-left:.375rem}.btn-group-lg>.btn+.dropdown-toggle-split,.btn-lg+.dropdown-toggle-split{padding-right:.75rem;padding-left:.75rem}.btn-group-vertical{flex-direction:column;align-items:flex-start;justify-content:center}.btn-group-vertical>.btn,.btn-group-vertical>.btn-group{width:100%}.btn-group-vertical>.btn-group:not(:first-child),.btn-group-vertical>.btn:not(:first-child){margin-top:-1px}.btn-group-vertical>.btn-group:not(:last-child)>.btn,.btn-group-vertical>.btn:not(:last-child):not(.dropdown-toggle){border-bottom-right-radius:0;border-bottom-left-radius:0}.btn-group-vertical>.btn-group:not(:first-child)>.btn,.btn-group-vertical>.btn:not(:first-child){border-top-left-radius:0;border-top-right-radius:0}.btn-group-toggle>.btn,.btn-group-toggle>.btn-group>.btn{margin-bottom:0}.btn-group-toggle>.btn-group>.btn input[type=checkbox],.btn-group-toggle>.btn-group>.btn input[type=radio],.btn-group-toggle>.btn input[type=checkbox],.btn-group-toggle>.btn input[type=radio]{position:absolute;clip:rect(0,0,0,0);pointer-events:none}.input-group{position:relative;display:flex;flex-wrap:wrap;align-items:stretch;width:100%}.input-group>.custom-file,.input-group>.custom-select,.input-group>.form-control,.input-group>.form-control-plaintext{position:relative;flex:1 1 auto;width:1%;min-width:0;margin-bottom:0}.input-group>.custom-file+.custom-file,.input-group>.custom-file+.custom-select,.input-group>.custom-file+.form-control,.input-group>.custom-select+.custom-file,.input-group>.custom-select+.custom-select,.input-group>.custom-select+.form-control,.input-group>.form-control+.custom-file,.input-group>.form-control+.custom-select,.input-group>.form-control+.form-control,.input-group>.form-control-plaintext+.custom-file,.input-group>.form-control-plaintext+.custom-select,.input-group>.form-control-plaintext+.form-control{margin-left:-1px}.input-group>.custom-file .custom-file-input:focus~.custom-file-label,.input-group>.custom-select:focus,.input-group>.form-control:focus{z-index:3}.input-group>.custom-file .custom-file-input:focus{z-index:4}.input-group>.custom-select:not(:first-child),.input-group>.form-control:not(:first-child){border-top-left-radius:0;border-bottom-left-radius:0}.input-group>.custom-file{display:flex;align-items:center}.input-group>.custom-file:not(:last-child) .custom-file-label,.input-group>.custom-file:not(:last-child) .custom-file-label:after{border-top-right-radius:0;border-bottom-right-radius:0}.input-group>.custom-file:not(:first-child) .custom-file-label{border-top-left-radius:0;border-bottom-left-radius:0}.input-group.has-validation>.custom-file:nth-last-child(n+3) .custom-file-label,.input-group.has-validation>.custom-file:nth-last-child(n+3) .custom-file-label:after,.input-group.has-validation>.custom-select:nth-last-child(n+3),.input-group.has-validation>.form-control:nth-last-child(n+3),.input-group:not(.has-validation)>.custom-file:not(:last-child) .custom-file-label,.input-group:not(.has-validation)>.custom-file:not(:last-child) .custom-file-label:after,.input-group:not(.has-validation)>.custom-select:not(:last-child),.input-group:not(.has-validation)>.form-control:not(:last-child){border-top-right-radius:0;border-bottom-right-radius:0}.input-group-append,.input-group-prepend{display:flex}.input-group-append .btn,.input-group-prepend .btn{position:relative;z-index:2}.input-group-append .btn:focus,.input-group-prepend .btn:focus{z-index:3}.input-group-append .btn+.btn,.input-group-append .btn+.input-group-text,.input-group-append .input-group-text+.btn,.input-group-append .input-group-text+.input-group-text,.input-group-prepend .btn+.btn,.input-group-prepend .btn+.input-group-text,.input-group-prepend .input-group-text+.btn,.input-group-prepend .input-group-text+.input-group-text{margin-left:-1px}.input-group-prepend{margin-right:-1px}.input-group-append{margin-left:-1px}.input-group-text{display:flex;align-items:center;padding:.375rem .75rem;margin-bottom:0;font-size:1rem;font-weight:400;line-height:1.5;color:#495057;text-align:center;white-space:nowrap;background-color:#e9ecef;border:1px solid #ced4da;border-radius:.25rem}.input-group-text input[type=checkbox],.input-group-text input[type=radio]{margin-top:0}.input-group-lg>.custom-select,.input-group-lg>.form-control:not(textarea){height:calc(1.5em + 1rem + 2px)}.input-group-lg>.custom-select,.input-group-lg>.form-control,.input-group-lg>.input-group-append>.btn,.input-group-lg>.input-group-append>.input-group-text,.input-group-lg>.input-group-prepend>.btn,.input-group-lg>.input-group-prepend>.input-group-text{padding:.5rem 1rem;font-size:1.25rem;line-height:1.5;border-radius:.3rem}.input-group-sm>.custom-select,.input-group-sm>.form-control:not(textarea){height:calc(1.5em + .5rem + 2px)}.input-group-sm>.custom-select,.input-group-sm>.form-control,.input-group-sm>.input-group-append>.btn,.input-group-sm>.input-group-append>.input-group-text,.input-group-sm>.input-group-prepend>.btn,.input-group-sm>.input-group-prepend>.input-group-text{padding:.25rem .5rem;font-size:.875rem;line-height:1.5;border-radius:.2rem}.input-group-lg>.custom-select,.input-group-sm>.custom-select{padding-right:1.75rem}.input-group.has-validation>.input-group-append:nth-last-child(n+3)>.btn,.input-group.has-validation>.input-group-append:nth-last-child(n+3)>.input-group-text,.input-group:not(.has-validation)>.input-group-append:not(:last-child)>.btn,.input-group:not(.has-validation)>.input-group-append:not(:last-child)>.input-group-text,.input-group>.input-group-append:last-child>.btn:not(:last-child):not(.dropdown-toggle),.input-group>.input-group-append:last-child>.input-group-text:not(:last-child),.input-group>.input-group-prepend>.btn,.input-group>.input-group-prepend>.input-group-text{border-top-right-radius:0;border-bottom-right-radius:0}.input-group>.input-group-append>.btn,.input-group>.input-group-append>.input-group-text,.input-group>.input-group-prepend:first-child>.btn:not(:first-child),.input-group>.input-group-prepend:first-child>.input-group-text:not(:first-child),.input-group>.input-group-prepend:not(:first-child)>.btn,.input-group>.input-group-prepend:not(:first-child)>.input-group-text{border-top-left-radius:0;border-bottom-left-radius:0}.custom-control{position:relative;z-index:1;display:block;min-height:1.5rem;padding-left:1.5rem;color-adjust:exact}.custom-control-inline{display:inline-flex;margin-right:1rem}.custom-control-input{position:absolute;left:0;z-index:-1;width:1rem;height:1.25rem;opacity:0}.custom-control-input:checked~.custom-control-label:before{color:#fff;border-color:#007bff;background-color:#007bff}.custom-control-input:focus~.custom-control-label:before{box-shadow:0 0 0 .2rem rgba(0,123,255,.25)}.custom-control-input:focus:not(:checked)~.custom-control-label:before{border-color:#80bdff}.custom-control-input:not(:disabled):active~.custom-control-label:before{color:#fff;background-color:#b3d7ff;border-color:#b3d7ff}.custom-control-input:disabled~.custom-control-label,.custom-control-input[disabled]~.custom-control-label{color:#6c757d}.custom-control-input:disabled~.custom-control-label:before,.custom-control-input[disabled]~.custom-control-label:before{background-color:#e9ecef}.custom-control-label{position:relative;margin-bottom:0;vertical-align:top}.custom-control-label:before{pointer-events:none;background-color:#fff;border:1px solid #adb5bd}.custom-control-label:after,.custom-control-label:before{position:absolute;top:.25rem;left:-1.5rem;display:block;width:1rem;height:1rem;content:""}.custom-control-label:after{background:50%/50% 50% no-repeat}.custom-checkbox .custom-control-label:before{border-radius:.25rem}.custom-checkbox .custom-control-input:checked~.custom-control-label:after{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='8' height='8'%3E%3Cpath fill='%23fff' d='M6.564.75l-3.59 3.612-1.538-1.55L0 4.26l2.974 2.99L8 2.193z'/%3E%3C/svg%3E")}.custom-checkbox .custom-control-input:indeterminate~.custom-control-label:before{border-color:#007bff;background-color:#007bff}.custom-checkbox .custom-control-input:indeterminate~.custom-control-label:after{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='4' height='4'%3E%3Cpath stroke='%23fff' d='M0 2h4'/%3E%3C/svg%3E")}.custom-checkbox .custom-control-input:disabled:checked~.custom-control-label:before{background-color:rgba(0,123,255,.5)}.custom-checkbox .custom-control-input:disabled:indeterminate~.custom-control-label:before{background-color:rgba(0,123,255,.5)}.custom-radio .custom-control-label:before{border-radius:50%}.custom-radio .custom-control-input:checked~.custom-control-label:after{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='-4 -4 8 8'%3E%3Ccircle r='3' fill='%23fff'/%3E%3C/svg%3E")}.custom-radio .custom-control-input:disabled:checked~.custom-control-label:before{background-color:rgba(0,123,255,.5)}.custom-switch{padding-left:2.25rem}.custom-switch .custom-control-label:before{left:-2.25rem;width:1.75rem;pointer-events:all;border-radius:.5rem}.custom-switch .custom-control-label:after{top:calc(.25rem + 2px);left:calc(-2.25rem + 2px);width:calc(1rem - 4px);height:calc(1rem - 4px);background-color:#adb5bd;border-radius:.5rem;transition:transform .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media (prefers-reduced-motion:reduce){.custom-switch .custom-control-label:after{transition:none}}.custom-switch .custom-control-input:checked~.custom-control-label:after{background-color:#fff;transform:translateX(.75rem)}.custom-switch .custom-control-input:disabled:checked~.custom-control-label:before{background-color:rgba(0,123,255,.5)}.custom-select{display:inline-block;width:100%;height:calc(1.5em + .75rem + 2px);padding:.375rem 1.75rem .375rem .75rem;font-size:1rem;font-weight:400;line-height:1.5;color:#495057;vertical-align:middle;background:#fff url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='4' height='5'%3E%3Cpath fill='%23343a40' d='M2 0L0 2h4zm0 5L0 3h4z'/%3E%3C/svg%3E") right .75rem center/8px 10px no-repeat;border:1px solid #ced4da;border-radius:.25rem;appearance:none}.custom-select:focus{border-color:#80bdff;outline:0;box-shadow:0 0 0 .2rem rgba(0,123,255,.25)}.custom-select:focus::-ms-value{color:#495057;background-color:#fff}.custom-select[multiple],.custom-select[size]:not([size="1"]){height:auto;padding-right:.75rem;background-image:none}.custom-select:disabled{color:#6c757d;background-color:#e9ecef}.custom-select::-ms-expand{display:none}.custom-select:-moz-focusring{color:transparent;text-shadow:0 0 0 #495057}.custom-select-sm{height:calc(1.5em + .5rem + 2px);padding-top:.25rem;padding-bottom:.25rem;padding-left:.5rem;font-size:.875rem}.custom-select-lg{height:calc(1.5em + 1rem + 2px);padding-top:.5rem;padding-bottom:.5rem;padding-left:1rem;font-size:1.25rem}.custom-file{display:inline-block;margin-bottom:0}.custom-file,.custom-file-input{position:relative;width:100%;height:calc(1.5em + .75rem + 2px)}.custom-file-input{z-index:2;margin:0;overflow:hidden;opacity:0}.custom-file-input:focus~.custom-file-label{border-color:#80bdff;box-shadow:0 0 0 .2rem rgba(0,123,255,.25)}.custom-file-input:disabled~.custom-file-label,.custom-file-input[disabled]~.custom-file-label{background-color:#e9ecef}.custom-file-input:lang(en)~.custom-file-label:after{content:"Browse"}.custom-file-input~.custom-file-label[data-browse]:after{content:attr(data-browse)}.custom-file-label{left:0;z-index:1;height:calc(1.5em + .75rem + 2px);overflow:hidden;font-weight:400;background-color:#fff;border:1px solid #ced4da;border-radius:.25rem}.custom-file-label,.custom-file-label:after{position:absolute;top:0;right:0;padding:.375rem .75rem;line-height:1.5;color:#495057}.custom-file-label:after{bottom:0;z-index:3;display:block;height:calc(1.5em + .75rem);content:"Browse";background-color:#e9ecef;border-left:inherit;border-radius:0 .25rem .25rem 0}.custom-range{width:100%;height:1.4rem;padding:0;background-color:transparent;appearance:none}.custom-range:focus{outline:0}.custom-range:focus::-webkit-slider-thumb{box-shadow:0 0 0 1px #fff,0 0 0 .2rem rgba(0,123,255,.25)}.custom-range:focus::-moz-range-thumb{box-shadow:0 0 0 1px #fff,0 0 0 .2rem rgba(0,123,255,.25)}.custom-range:focus::-ms-thumb{box-shadow:0 0 0 1px #fff,0 0 0 .2rem rgba(0,123,255,.25)}.custom-range::-moz-focus-outer{border:0}.custom-range::-webkit-slider-thumb{width:1rem;height:1rem;margin-top:-.25rem;background-color:#007bff;border:0;border-radius:1rem;transition:background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out;appearance:none}@media (prefers-reduced-motion:reduce){.custom-range::-webkit-slider-thumb{transition:none}}.custom-range::-webkit-slider-thumb:active{background-color:#b3d7ff}.custom-range::-webkit-slider-runnable-track{width:100%;height:.5rem;color:transparent;cursor:pointer;background-color:#dee2e6;border-color:transparent;border-radius:1rem}.custom-range::-moz-range-thumb{width:1rem;height:1rem;background-color:#007bff;border:0;border-radius:1rem;transition:background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out;appearance:none}@media (prefers-reduced-motion:reduce){.custom-range::-moz-range-thumb{transition:none}}.custom-range::-moz-range-thumb:active{background-color:#b3d7ff}.custom-range::-moz-range-track{width:100%;height:.5rem;color:transparent;cursor:pointer;background-color:#dee2e6;border-color:transparent;border-radius:1rem}.custom-range::-ms-thumb{width:1rem;height:1rem;margin-top:0;margin-right:.2rem;margin-left:.2rem;background-color:#007bff;border:0;border-radius:1rem;transition:background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out;appearance:none}@media (prefers-reduced-motion:reduce){.custom-range::-ms-thumb{transition:none}}.custom-range::-ms-thumb:active{background-color:#b3d7ff}.custom-range::-ms-track{width:100%;height:.5rem;color:transparent;cursor:pointer;background-color:transparent;border-color:transparent;border-width:.5rem}.custom-range::-ms-fill-lower,.custom-range::-ms-fill-upper{background-color:#dee2e6;border-radius:1rem}.custom-range::-ms-fill-upper{margin-right:15px}.custom-range:disabled::-webkit-slider-thumb{background-color:#adb5bd}.custom-range:disabled::-webkit-slider-runnable-track{cursor:default}.custom-range:disabled::-moz-range-thumb{background-color:#adb5bd}.custom-range:disabled::-moz-range-track{cursor:default}.custom-range:disabled::-ms-thumb{background-color:#adb5bd}.custom-control-label:before,.custom-file-label,.custom-select{transition:background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media (prefers-reduced-motion:reduce){.custom-control-label:before,.custom-file-label,.custom-select{transition:none}}.nav{display:flex;flex-wrap:wrap;padding-left:0;margin-bottom:0;list-style:none}.nav-link{display:block;padding:.5rem 1rem}.nav-link:focus,.nav-link:hover{text-decoration:none}.nav-link.disabled{color:#6c757d;pointer-events:none;cursor:default}.nav-tabs{border-bottom:1px solid #dee2e6}.nav-tabs .nav-link{margin-bottom:-1px;border:1px solid transparent;border-top-left-radius:.25rem;border-top-right-radius:.25rem}.nav-tabs .nav-link:focus,.nav-tabs .nav-link:hover{border-color:#e9ecef #e9ecef #dee2e6}.nav-tabs .nav-link.disabled{color:#6c757d;background-color:transparent;border-color:transparent}.nav-tabs .nav-item.show .nav-link,.nav-tabs .nav-link.active{color:#495057;background-color:#fff;border-color:#dee2e6 #dee2e6 #fff}.nav-tabs .dropdown-menu{margin-top:-1px;border-top-left-radius:0;border-top-right-radius:0}.nav-pills .nav-link{border-radius:.25rem}.nav-pills .nav-link.active,.nav-pills .show>.nav-link{color:#fff;background-color:#007bff}.nav-fill .nav-item,.nav-fill>.nav-link{flex:1 1 auto;text-align:center}.nav-justified .nav-item,.nav-justified>.nav-link{flex-basis:0;flex-grow:1;text-align:center}.tab-content>.tab-pane{display:none}.tab-content>.active{display:block}.navbar{position:relative;padding:.5rem 1rem}.navbar,.navbar .container,.navbar .container-fluid,.navbar .container-lg,.navbar .container-md,.navbar .container-sm,.navbar .container-xl{display:flex;flex-wrap:wrap;align-items:center;justify-content:space-between}.navbar-brand{display:inline-block;padding-top:.3125rem;padding-bottom:.3125rem;margin-right:1rem;font-size:1.25rem;line-height:inherit;white-space:nowrap}.navbar-brand:focus,.navbar-brand:hover{text-decoration:none}.navbar-nav{display:flex;flex-direction:column;padding-left:0;margin-bottom:0;list-style:none}.navbar-nav .nav-link{padding-right:0;padding-left:0}.navbar-nav .dropdown-menu{position:static;float:none}.navbar-text{display:inline-block;padding-top:.5rem;padding-bottom:.5rem}.navbar-collapse{flex-basis:100%;flex-grow:1;align-items:center}.navbar-toggler{padding:.25rem .75rem;font-size:1.25rem;line-height:1;background-color:transparent;border:1px solid transparent;border-radius:.25rem}.navbar-toggler:focus,.navbar-toggler:hover{text-decoration:none}.navbar-toggler-icon{display:inline-block;width:1.5em;height:1.5em;vertical-align:middle;content:"";background:50%/100% 100% no-repeat}.navbar-nav-scroll{max-height:75vh;overflow-y:auto}@media (max-width:539.98px){.navbar-expand-sm>.container,.navbar-expand-sm>.container-fluid,.navbar-expand-sm>.container-lg,.navbar-expand-sm>.container-md,.navbar-expand-sm>.container-sm,.navbar-expand-sm>.container-xl{padding-right:0;padding-left:0}}@media (min-width:540px){.navbar-expand-sm{flex-flow:row nowrap;justify-content:flex-start}.navbar-expand-sm .navbar-nav{flex-direction:row}.navbar-expand-sm .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-sm .navbar-nav .nav-link{padding-right:.5rem;padding-left:.5rem}.navbar-expand-sm>.container,.navbar-expand-sm>.container-fluid,.navbar-expand-sm>.container-lg,.navbar-expand-sm>.container-md,.navbar-expand-sm>.container-sm,.navbar-expand-sm>.container-xl{flex-wrap:nowrap}.navbar-expand-sm .navbar-nav-scroll{overflow:visible}.navbar-expand-sm .navbar-collapse{display:flex!important;flex-basis:auto}.navbar-expand-sm .navbar-toggler{display:none}}@media (max-width:719.98px){.navbar-expand-md>.container,.navbar-expand-md>.container-fluid,.navbar-expand-md>.container-lg,.navbar-expand-md>.container-md,.navbar-expand-md>.container-sm,.navbar-expand-md>.container-xl{padding-right:0;padding-left:0}}@media (min-width:720px){.navbar-expand-md{flex-flow:row nowrap;justify-content:flex-start}.navbar-expand-md .navbar-nav{flex-direction:row}.navbar-expand-md .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-md .navbar-nav .nav-link{padding-right:.5rem;padding-left:.5rem}.navbar-expand-md>.container,.navbar-expand-md>.container-fluid,.navbar-expand-md>.container-lg,.navbar-expand-md>.container-md,.navbar-expand-md>.container-sm,.navbar-expand-md>.container-xl{flex-wrap:nowrap}.navbar-expand-md .navbar-nav-scroll{overflow:visible}.navbar-expand-md .navbar-collapse{display:flex!important;flex-basis:auto}.navbar-expand-md .navbar-toggler{display:none}}@media (max-width:959.98px){.navbar-expand-lg>.container,.navbar-expand-lg>.container-fluid,.navbar-expand-lg>.container-lg,.navbar-expand-lg>.container-md,.navbar-expand-lg>.container-sm,.navbar-expand-lg>.container-xl{padding-right:0;padding-left:0}}@media (min-width:960px){.navbar-expand-lg{flex-flow:row nowrap;justify-content:flex-start}.navbar-expand-lg .navbar-nav{flex-direction:row}.navbar-expand-lg .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-lg .navbar-nav .nav-link{padding-right:.5rem;padding-left:.5rem}.navbar-expand-lg>.container,.navbar-expand-lg>.container-fluid,.navbar-expand-lg>.container-lg,.navbar-expand-lg>.container-md,.navbar-expand-lg>.container-sm,.navbar-expand-lg>.container-xl{flex-wrap:nowrap}.navbar-expand-lg .navbar-nav-scroll{overflow:visible}.navbar-expand-lg .navbar-collapse{display:flex!important;flex-basis:auto}.navbar-expand-lg .navbar-toggler{display:none}}@media (max-width:1199.98px){.navbar-expand-xl>.container,.navbar-expand-xl>.container-fluid,.navbar-expand-xl>.container-lg,.navbar-expand-xl>.container-md,.navbar-expand-xl>.container-sm,.navbar-expand-xl>.container-xl{padding-right:0;padding-left:0}}@media (min-width:1200px){.navbar-expand-xl{flex-flow:row nowrap;justify-content:flex-start}.navbar-expand-xl .navbar-nav{flex-direction:row}.navbar-expand-xl .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-xl .navbar-nav .nav-link{padding-right:.5rem;padding-left:.5rem}.navbar-expand-xl>.container,.navbar-expand-xl>.container-fluid,.navbar-expand-xl>.container-lg,.navbar-expand-xl>.container-md,.navbar-expand-xl>.container-sm,.navbar-expand-xl>.container-xl{flex-wrap:nowrap}.navbar-expand-xl .navbar-nav-scroll{overflow:visible}.navbar-expand-xl .navbar-collapse{display:flex!important;flex-basis:auto}.navbar-expand-xl .navbar-toggler{display:none}}.navbar-expand{flex-flow:row nowrap;justify-content:flex-start}.navbar-expand>.container,.navbar-expand>.container-fluid,.navbar-expand>.container-lg,.navbar-expand>.container-md,.navbar-expand>.container-sm,.navbar-expand>.container-xl{padding-right:0;padding-left:0}.navbar-expand .navbar-nav{flex-direction:row}.navbar-expand .navbar-nav .dropdown-menu{position:absolute}.navbar-expand .navbar-nav .nav-link{padding-right:.5rem;padding-left:.5rem}.navbar-expand>.container,.navbar-expand>.container-fluid,.navbar-expand>.container-lg,.navbar-expand>.container-md,.navbar-expand>.container-sm,.navbar-expand>.container-xl{flex-wrap:nowrap}.navbar-expand .navbar-nav-scroll{overflow:visible}.navbar-expand .navbar-collapse{display:flex!important;flex-basis:auto}.navbar-expand .navbar-toggler{display:none}.navbar-light .navbar-brand,.navbar-light .navbar-brand:focus,.navbar-light .navbar-brand:hover{color:rgba(0,0,0,.9)}.navbar-light .navbar-nav .nav-link{color:rgba(0,0,0,.5)}.navbar-light .navbar-nav .nav-link:focus,.navbar-light .navbar-nav .nav-link:hover{color:rgba(0,0,0,.7)}.navbar-light .navbar-nav .nav-link.disabled{color:rgba(0,0,0,.3)}.navbar-light .navbar-nav .active>.nav-link,.navbar-light .navbar-nav .nav-link.active,.navbar-light .navbar-nav .nav-link.show,.navbar-light .navbar-nav .show>.nav-link{color:rgba(0,0,0,.9)}.navbar-light .navbar-toggler{color:rgba(0,0,0,.5);border-color:rgba(0,0,0,.1)}.navbar-light .navbar-toggler-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='30' height='30'%3E%3Cpath stroke='rgba(0,0,0,0.5)' stroke-linecap='round' stroke-miterlimit='10' stroke-width='2' d='M4 7h22M4 15h22M4 23h22'/%3E%3C/svg%3E")}.navbar-light .navbar-text{color:rgba(0,0,0,.5)}.navbar-light .navbar-text a,.navbar-light .navbar-text a:focus,.navbar-light .navbar-text a:hover{color:rgba(0,0,0,.9)}.navbar-dark .navbar-brand,.navbar-dark .navbar-brand:focus,.navbar-dark .navbar-brand:hover{color:#fff}.navbar-dark .navbar-nav .nav-link{color:hsla(0,0%,100%,.5)}.navbar-dark .navbar-nav .nav-link:focus,.navbar-dark .navbar-nav .nav-link:hover{color:hsla(0,0%,100%,.75)}.navbar-dark .navbar-nav .nav-link.disabled{color:hsla(0,0%,100%,.25)}.navbar-dark .navbar-nav .active>.nav-link,.navbar-dark .navbar-nav .nav-link.active,.navbar-dark .navbar-nav .nav-link.show,.navbar-dark .navbar-nav .show>.nav-link{color:#fff}.navbar-dark .navbar-toggler{color:hsla(0,0%,100%,.5);border-color:hsla(0,0%,100%,.1)}.navbar-dark .navbar-toggler-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='30' height='30'%3E%3Cpath stroke='rgba(255,255,255,0.5)' stroke-linecap='round' stroke-miterlimit='10' stroke-width='2' d='M4 7h22M4 15h22M4 23h22'/%3E%3C/svg%3E")}.navbar-dark .navbar-text{color:hsla(0,0%,100%,.5)}.navbar-dark .navbar-text a,.navbar-dark .navbar-text a:focus,.navbar-dark .navbar-text a:hover{color:#fff}.card{position:relative;display:flex;flex-direction:column;min-width:0;word-wrap:break-word;background-color:#fff;background-clip:border-box;border:1px solid rgba(0,0,0,.125);border-radius:.25rem}.card>hr{margin-right:0;margin-left:0}.card>.list-group{border-top:inherit;border-bottom:inherit}.card>.list-group:first-child{border-top-width:0;border-top-left-radius:calc(.25rem - 1px);border-top-right-radius:calc(.25rem - 1px)}.card>.list-group:last-child{border-bottom-width:0;border-bottom-right-radius:calc(.25rem - 1px);border-bottom-left-radius:calc(.25rem - 1px)}.card>.card-header+.list-group,.card>.list-group+.card-footer{border-top:0}.card-body{flex:1 1 auto;min-height:1px;padding:1.25rem}.card-title{margin-bottom:.75rem}.card-subtitle{margin-top:-.375rem}.card-subtitle,.card-text:last-child{margin-bottom:0}.card-link:hover{text-decoration:none}.card-link+.card-link{margin-left:1.25rem}.card-header{padding:.75rem 1.25rem;margin-bottom:0;background-color:rgba(0,0,0,.03);border-bottom:1px solid rgba(0,0,0,.125)}.card-header:first-child{border-radius:calc(.25rem - 1px) calc(.25rem - 1px) 0 0}.card-footer{padding:.75rem 1.25rem;background-color:rgba(0,0,0,.03);border-top:1px solid rgba(0,0,0,.125)}.card-footer:last-child{border-radius:0 0 calc(.25rem - 1px) calc(.25rem - 1px)}.card-header-tabs{margin-bottom:-.75rem;border-bottom:0}.card-header-pills,.card-header-tabs{margin-right:-.625rem;margin-left:-.625rem}.card-img-overlay{position:absolute;top:0;right:0;bottom:0;left:0;padding:1.25rem;border-radius:calc(.25rem - 1px)}.card-img,.card-img-bottom,.card-img-top{flex-shrink:0;width:100%}.card-img,.card-img-top{border-top-left-radius:calc(.25rem - 1px);border-top-right-radius:calc(.25rem - 1px)}.card-img,.card-img-bottom{border-bottom-right-radius:calc(.25rem - 1px);border-bottom-left-radius:calc(.25rem - 1px)}.card-deck .card{margin-bottom:15px}@media (min-width:540px){.card-deck{display:flex;flex-flow:row wrap;margin-right:-15px;margin-left:-15px}.card-deck .card{flex:1 0 0%;margin-right:15px;margin-bottom:0;margin-left:15px}}.card-group>.card{margin-bottom:15px}@media (min-width:540px){.card-group{display:flex;flex-flow:row wrap}.card-group>.card{flex:1 0 0%;margin-bottom:0}.card-group>.card+.card{margin-left:0;border-left:0}.card-group>.card:not(:last-child){border-top-right-radius:0;border-bottom-right-radius:0}.card-group>.card:not(:last-child) .card-header,.card-group>.card:not(:last-child) .card-img-top{border-top-right-radius:0}.card-group>.card:not(:last-child) .card-footer,.card-group>.card:not(:last-child) .card-img-bottom{border-bottom-right-radius:0}.card-group>.card:not(:first-child){border-top-left-radius:0;border-bottom-left-radius:0}.card-group>.card:not(:first-child) .card-header,.card-group>.card:not(:first-child) .card-img-top{border-top-left-radius:0}.card-group>.card:not(:first-child) .card-footer,.card-group>.card:not(:first-child) .card-img-bottom{border-bottom-left-radius:0}}.card-columns .card{margin-bottom:.75rem}@media (min-width:540px){.card-columns{column-count:3;column-gap:1.25rem;orphans:1;widows:1}.card-columns .card{display:inline-block;width:100%}}.accordion{overflow-anchor:none}.accordion>.card{overflow:hidden}.accordion>.card:not(:last-of-type){border-bottom:0;border-bottom-right-radius:0;border-bottom-left-radius:0}.accordion>.card:not(:first-of-type){border-top-left-radius:0;border-top-right-radius:0}.accordion>.card>.card-header{border-radius:0;margin-bottom:-1px}.breadcrumb{display:flex;flex-wrap:wrap;padding:.75rem 1rem;margin-bottom:1rem;list-style:none;background-color:#e9ecef;border-radius:.25rem}.breadcrumb-item+.breadcrumb-item{padding-left:.5rem}.breadcrumb-item+.breadcrumb-item:before{float:left;padding-right:.5rem;color:#6c757d;content:"/"}.breadcrumb-item+.breadcrumb-item:hover:before{text-decoration:underline;text-decoration:none}.breadcrumb-item.active{color:#6c757d}.pagination{display:flex;padding-left:0;list-style:none;border-radius:.25rem}.page-link{position:relative;display:block;padding:.5rem .75rem;margin-left:-1px;line-height:1.25;color:#007bff;background-color:#fff;border:1px solid #dee2e6}.page-link:hover{z-index:2;color:#0056b3;text-decoration:none;background-color:#e9ecef;border-color:#dee2e6}.page-link:focus{z-index:3;outline:0;box-shadow:0 0 0 .2rem rgba(0,123,255,.25)}.page-item:first-child .page-link{margin-left:0;border-top-left-radius:.25rem;border-bottom-left-radius:.25rem}.page-item:last-child .page-link{border-top-right-radius:.25rem;border-bottom-right-radius:.25rem}.page-item.active .page-link{z-index:3;color:#fff;background-color:#007bff;border-color:#007bff}.page-item.disabled .page-link{color:#6c757d;pointer-events:none;cursor:auto;background-color:#fff;border-color:#dee2e6}.pagination-lg .page-link{padding:.75rem 1.5rem;font-size:1.25rem;line-height:1.5}.pagination-lg .page-item:first-child .page-link{border-top-left-radius:.3rem;border-bottom-left-radius:.3rem}.pagination-lg .page-item:last-child .page-link{border-top-right-radius:.3rem;border-bottom-right-radius:.3rem}.pagination-sm .page-link{padding:.25rem .5rem;font-size:.875rem;line-height:1.5}.pagination-sm .page-item:first-child .page-link{border-top-left-radius:.2rem;border-bottom-left-radius:.2rem}.pagination-sm .page-item:last-child .page-link{border-top-right-radius:.2rem;border-bottom-right-radius:.2rem}.badge{display:inline-block;padding:.25em .4em;font-size:75%;font-weight:700;line-height:1;text-align:center;white-space:nowrap;vertical-align:baseline;border-radius:.25rem;transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media (prefers-reduced-motion:reduce){.badge{transition:none}}a.badge:focus,a.badge:hover{text-decoration:none}.badge:empty{display:none}.btn .badge{position:relative;top:-1px}.badge-pill{padding-right:.6em;padding-left:.6em;border-radius:10rem}.badge-primary{color:#fff;background-color:#007bff}a.badge-primary:focus,a.badge-primary:hover{color:#fff;background-color:#0062cc}a.badge-primary.focus,a.badge-primary:focus{outline:0;box-shadow:0 0 0 .2rem rgba(0,123,255,.5)}.badge-secondary{color:#fff;background-color:#6c757d}a.badge-secondary:focus,a.badge-secondary:hover{color:#fff;background-color:#545b62}a.badge-secondary.focus,a.badge-secondary:focus{outline:0;box-shadow:0 0 0 .2rem rgba(108,117,125,.5)}.badge-success{color:#fff;background-color:#28a745}a.badge-success:focus,a.badge-success:hover{color:#fff;background-color:#1e7e34}a.badge-success.focus,a.badge-success:focus{outline:0;box-shadow:0 0 0 .2rem rgba(40,167,69,.5)}.badge-info{color:#fff;background-color:#17a2b8}a.badge-info:focus,a.badge-info:hover{color:#fff;background-color:#117a8b}a.badge-info.focus,a.badge-info:focus{outline:0;box-shadow:0 0 0 .2rem rgba(23,162,184,.5)}.badge-warning{color:#212529;background-color:#ffc107}a.badge-warning:focus,a.badge-warning:hover{color:#212529;background-color:#d39e00}a.badge-warning.focus,a.badge-warning:focus{outline:0;box-shadow:0 0 0 .2rem rgba(255,193,7,.5)}.badge-danger{color:#fff;background-color:#dc3545}a.badge-danger:focus,a.badge-danger:hover{color:#fff;background-color:#bd2130}a.badge-danger.focus,a.badge-danger:focus{outline:0;box-shadow:0 0 0 .2rem rgba(220,53,69,.5)}.badge-light{color:#212529;background-color:#f8f9fa}a.badge-light:focus,a.badge-light:hover{color:#212529;background-color:#dae0e5}a.badge-light.focus,a.badge-light:focus{outline:0;box-shadow:0 0 0 .2rem rgba(248,249,250,.5)}.badge-dark{color:#fff;background-color:#343a40}a.badge-dark:focus,a.badge-dark:hover{color:#fff;background-color:#1d2124}a.badge-dark.focus,a.badge-dark:focus{outline:0;box-shadow:0 0 0 .2rem rgba(52,58,64,.5)}.jumbotron{padding:2rem 1rem;margin-bottom:2rem;background-color:#e9ecef;border-radius:.3rem}@media (min-width:540px){.jumbotron{padding:4rem 2rem}}.jumbotron-fluid{padding-right:0;padding-left:0;border-radius:0}.alert{position:relative;padding:.75rem 1.25rem;margin-bottom:1rem;border:1px solid transparent;border-radius:.25rem}.alert-heading{color:inherit}.alert-link{font-weight:700}.alert-dismissible{padding-right:4rem}.alert-dismissible .close{position:absolute;top:0;right:0;z-index:2;padding:.75rem 1.25rem;color:inherit}.alert-primary{color:#004085;background-color:#cce5ff;border-color:#b8daff}.alert-primary hr{border-top-color:#9fcdff}.alert-primary .alert-link{color:#002752}.alert-secondary{color:#383d41;background-color:#e2e3e5;border-color:#d6d8db}.alert-secondary hr{border-top-color:#c8cbcf}.alert-secondary .alert-link{color:#202326}.alert-success{color:#155724;background-color:#d4edda;border-color:#c3e6cb}.alert-success hr{border-top-color:#b1dfbb}.alert-success .alert-link{color:#0b2e13}.alert-info{color:#0c5460;background-color:#d1ecf1;border-color:#bee5eb}.alert-info hr{border-top-color:#abdde5}.alert-info .alert-link{color:#062c33}.alert-warning{color:#856404;background-color:#fff3cd;border-color:#ffeeba}.alert-warning hr{border-top-color:#ffe8a1}.alert-warning .alert-link{color:#533f03}.alert-danger{color:#721c24;background-color:#f8d7da;border-color:#f5c6cb}.alert-danger hr{border-top-color:#f1b0b7}.alert-danger .alert-link{color:#491217}.alert-light{color:#818182;background-color:#fefefe;border-color:#fdfdfe}.alert-light hr{border-top-color:#ececf6}.alert-light .alert-link{color:#686868}.alert-dark{color:#1b1e21;background-color:#d6d8d9;border-color:#c6c8ca}.alert-dark hr{border-top-color:#b9bbbe}.alert-dark .alert-link{color:#040505}@keyframes progress-bar-stripes{0%{background-position:1rem 0}to{background-position:0 0}}.progress{height:1rem;line-height:0;font-size:.75rem;background-color:#e9ecef;border-radius:.25rem}.progress,.progress-bar{display:flex;overflow:hidden}.progress-bar{flex-direction:column;justify-content:center;color:#fff;text-align:center;white-space:nowrap;background-color:#007bff;transition:width .6s ease}@media (prefers-reduced-motion:reduce){.progress-bar{transition:none}}.progress-bar-striped{background-image:linear-gradient(45deg,hsla(0,0%,100%,.15) 25%,transparent 0,transparent 50%,hsla(0,0%,100%,.15) 0,hsla(0,0%,100%,.15) 75%,transparent 0,transparent);background-size:1rem 1rem}.progress-bar-animated{animation:progress-bar-stripes 1s linear infinite}@media (prefers-reduced-motion:reduce){.progress-bar-animated{animation:none}}.media{display:flex;align-items:flex-start}.media-body{flex:1}.list-group{display:flex;flex-direction:column;padding-left:0;margin-bottom:0;border-radius:.25rem}.list-group-item-action{width:100%;color:#495057;text-align:inherit}.list-group-item-action:focus,.list-group-item-action:hover{z-index:1;color:#495057;text-decoration:none;background-color:#f8f9fa}.list-group-item-action:active{color:#212529;background-color:#e9ecef}.list-group-item{position:relative;display:block;padding:.75rem 1.25rem;background-color:#fff;border:1px solid rgba(0,0,0,.125)}.list-group-item:first-child{border-top-left-radius:inherit;border-top-right-radius:inherit}.list-group-item:last-child{border-bottom-right-radius:inherit;border-bottom-left-radius:inherit}.list-group-item.disabled,.list-group-item:disabled{color:#6c757d;pointer-events:none;background-color:#fff}.list-group-item.active{z-index:2;color:#fff;background-color:#007bff;border-color:#007bff}.list-group-item+.list-group-item{border-top-width:0}.list-group-item+.list-group-item.active{margin-top:-1px;border-top-width:1px}.list-group-horizontal{flex-direction:row}.list-group-horizontal>.list-group-item:first-child{border-bottom-left-radius:.25rem;border-top-right-radius:0}.list-group-horizontal>.list-group-item:last-child{border-top-right-radius:.25rem;border-bottom-left-radius:0}.list-group-horizontal>.list-group-item.active{margin-top:0}.list-group-horizontal>.list-group-item+.list-group-item{border-top-width:1px;border-left-width:0}.list-group-horizontal>.list-group-item+.list-group-item.active{margin-left:-1px;border-left-width:1px}@media (min-width:540px){.list-group-horizontal-sm{flex-direction:row}.list-group-horizontal-sm>.list-group-item:first-child{border-bottom-left-radius:.25rem;border-top-right-radius:0}.list-group-horizontal-sm>.list-group-item:last-child{border-top-right-radius:.25rem;border-bottom-left-radius:0}.list-group-horizontal-sm>.list-group-item.active{margin-top:0}.list-group-horizontal-sm>.list-group-item+.list-group-item{border-top-width:1px;border-left-width:0}.list-group-horizontal-sm>.list-group-item+.list-group-item.active{margin-left:-1px;border-left-width:1px}}@media (min-width:720px){.list-group-horizontal-md{flex-direction:row}.list-group-horizontal-md>.list-group-item:first-child{border-bottom-left-radius:.25rem;border-top-right-radius:0}.list-group-horizontal-md>.list-group-item:last-child{border-top-right-radius:.25rem;border-bottom-left-radius:0}.list-group-horizontal-md>.list-group-item.active{margin-top:0}.list-group-horizontal-md>.list-group-item+.list-group-item{border-top-width:1px;border-left-width:0}.list-group-horizontal-md>.list-group-item+.list-group-item.active{margin-left:-1px;border-left-width:1px}}@media (min-width:960px){.list-group-horizontal-lg{flex-direction:row}.list-group-horizontal-lg>.list-group-item:first-child{border-bottom-left-radius:.25rem;border-top-right-radius:0}.list-group-horizontal-lg>.list-group-item:last-child{border-top-right-radius:.25rem;border-bottom-left-radius:0}.list-group-horizontal-lg>.list-group-item.active{margin-top:0}.list-group-horizontal-lg>.list-group-item+.list-group-item{border-top-width:1px;border-left-width:0}.list-group-horizontal-lg>.list-group-item+.list-group-item.active{margin-left:-1px;border-left-width:1px}}@media (min-width:1200px){.list-group-horizontal-xl{flex-direction:row}.list-group-horizontal-xl>.list-group-item:first-child{border-bottom-left-radius:.25rem;border-top-right-radius:0}.list-group-horizontal-xl>.list-group-item:last-child{border-top-right-radius:.25rem;border-bottom-left-radius:0}.list-group-horizontal-xl>.list-group-item.active{margin-top:0}.list-group-horizontal-xl>.list-group-item+.list-group-item{border-top-width:1px;border-left-width:0}.list-group-horizontal-xl>.list-group-item+.list-group-item.active{margin-left:-1px;border-left-width:1px}}.list-group-flush{border-radius:0}.list-group-flush>.list-group-item{border-width:0 0 1px}.list-group-flush>.list-group-item:last-child{border-bottom-width:0}.list-group-item-primary{color:#004085;background-color:#b8daff}.list-group-item-primary.list-group-item-action:focus,.list-group-item-primary.list-group-item-action:hover{color:#004085;background-color:#9fcdff}.list-group-item-primary.list-group-item-action.active{color:#fff;background-color:#004085;border-color:#004085}.list-group-item-secondary{color:#383d41;background-color:#d6d8db}.list-group-item-secondary.list-group-item-action:focus,.list-group-item-secondary.list-group-item-action:hover{color:#383d41;background-color:#c8cbcf}.list-group-item-secondary.list-group-item-action.active{color:#fff;background-color:#383d41;border-color:#383d41}.list-group-item-success{color:#155724;background-color:#c3e6cb}.list-group-item-success.list-group-item-action:focus,.list-group-item-success.list-group-item-action:hover{color:#155724;background-color:#b1dfbb}.list-group-item-success.list-group-item-action.active{color:#fff;background-color:#155724;border-color:#155724}.list-group-item-info{color:#0c5460;background-color:#bee5eb}.list-group-item-info.list-group-item-action:focus,.list-group-item-info.list-group-item-action:hover{color:#0c5460;background-color:#abdde5}.list-group-item-info.list-group-item-action.active{color:#fff;background-color:#0c5460;border-color:#0c5460}.list-group-item-warning{color:#856404;background-color:#ffeeba}.list-group-item-warning.list-group-item-action:focus,.list-group-item-warning.list-group-item-action:hover{color:#856404;background-color:#ffe8a1}.list-group-item-warning.list-group-item-action.active{color:#fff;background-color:#856404;border-color:#856404}.list-group-item-danger{color:#721c24;background-color:#f5c6cb}.list-group-item-danger.list-group-item-action:focus,.list-group-item-danger.list-group-item-action:hover{color:#721c24;background-color:#f1b0b7}.list-group-item-danger.list-group-item-action.active{color:#fff;background-color:#721c24;border-color:#721c24}.list-group-item-light{color:#818182;background-color:#fdfdfe}.list-group-item-light.list-group-item-action:focus,.list-group-item-light.list-group-item-action:hover{color:#818182;background-color:#ececf6}.list-group-item-light.list-group-item-action.active{color:#fff;background-color:#818182;border-color:#818182}.list-group-item-dark{color:#1b1e21;background-color:#c6c8ca}.list-group-item-dark.list-group-item-action:focus,.list-group-item-dark.list-group-item-action:hover{color:#1b1e21;background-color:#b9bbbe}.list-group-item-dark.list-group-item-action.active{color:#fff;background-color:#1b1e21;border-color:#1b1e21}.close{float:right;font-size:1.5rem;font-weight:700;line-height:1;color:#000;text-shadow:0 1px 0 #fff;opacity:.5}.close:hover{color:#000;text-decoration:none}.close:not(:disabled):not(.disabled):focus,.close:not(:disabled):not(.disabled):hover{opacity:.75}button.close{padding:0;background-color:transparent;border:0}a.close.disabled{pointer-events:none}.toast{flex-basis:350px;max-width:350px;font-size:.875rem;background-color:hsla(0,0%,100%,.85);background-clip:padding-box;border:1px solid rgba(0,0,0,.1);box-shadow:0 .25rem .75rem rgba(0,0,0,.1);opacity:0;border-radius:.25rem}.toast:not(:last-child){margin-bottom:.75rem}.toast.showing{opacity:1}.toast.show{display:block;opacity:1}.toast.hide{display:none}.toast-header{display:flex;align-items:center;padding:.25rem .75rem;color:#6c757d;background-color:hsla(0,0%,100%,.85);background-clip:padding-box;border-bottom:1px solid rgba(0,0,0,.05);border-top-left-radius:calc(.25rem - 1px);border-top-right-radius:calc(.25rem - 1px)}.toast-body{padding:.75rem}.modal-open{overflow:hidden}.modal-open .modal{overflow-x:hidden;overflow-y:auto}.modal{position:fixed;top:0;left:0;z-index:1050;display:none;width:100%;height:100%;overflow:hidden;outline:0}.modal-dialog{position:relative;width:auto;margin:.5rem;pointer-events:none}.modal.fade .modal-dialog{transition:transform .3s ease-out;transform:translateY(-50px)}@media (prefers-reduced-motion:reduce){.modal.fade .modal-dialog{transition:none}}.modal.show .modal-dialog{transform:none}.modal.modal-static .modal-dialog{transform:scale(1.02)}.modal-dialog-scrollable{display:flex;max-height:calc(100% - 1rem)}.modal-dialog-scrollable .modal-content{max-height:calc(100vh - 1rem);overflow:hidden}.modal-dialog-scrollable .modal-footer,.modal-dialog-scrollable .modal-header{flex-shrink:0}.modal-dialog-scrollable .modal-body{overflow-y:auto}.modal-dialog-centered{display:flex;align-items:center;min-height:calc(100% - 1rem)}.modal-dialog-centered:before{display:block;height:calc(100vh - 1rem);height:min-content;content:""}.modal-dialog-centered.modal-dialog-scrollable{flex-direction:column;justify-content:center;height:100%}.modal-dialog-centered.modal-dialog-scrollable .modal-content{max-height:none}.modal-dialog-centered.modal-dialog-scrollable:before{content:none}.modal-content{position:relative;display:flex;flex-direction:column;width:100%;pointer-events:auto;background-color:#fff;background-clip:padding-box;border:1px solid rgba(0,0,0,.2);border-radius:.3rem;outline:0}.modal-backdrop{position:fixed;top:0;left:0;z-index:1040;width:100vw;height:100vh;background-color:#000}.modal-backdrop.fade{opacity:0}.modal-backdrop.show{opacity:.5}.modal-header{display:flex;align-items:flex-start;justify-content:space-between;padding:1rem;border-bottom:1px solid #dee2e6;border-top-left-radius:calc(.3rem - 1px);border-top-right-radius:calc(.3rem - 1px)}.modal-header .close{padding:1rem;margin:-1rem -1rem -1rem auto}.modal-title{margin-bottom:0;line-height:1.5}.modal-body{position:relative;flex:1 1 auto;padding:1rem}.modal-footer{display:flex;flex-wrap:wrap;align-items:center;justify-content:flex-end;padding:.75rem;border-top:1px solid #dee2e6;border-bottom-right-radius:calc(.3rem - 1px);border-bottom-left-radius:calc(.3rem - 1px)}.modal-footer>*{margin:.25rem}.modal-scrollbar-measure{position:absolute;top:-9999px;width:50px;height:50px;overflow:scroll}@media (min-width:540px){.modal-dialog{max-width:500px;margin:1.75rem auto}.modal-dialog-scrollable{max-height:calc(100% - 3.5rem)}.modal-dialog-scrollable .modal-content{max-height:calc(100vh - 3.5rem)}.modal-dialog-centered{min-height:calc(100% - 3.5rem)}.modal-dialog-centered:before{height:calc(100vh - 3.5rem);height:min-content}.modal-sm{max-width:300px}}@media (min-width:960px){.modal-lg,.modal-xl{max-width:800px}}@media (min-width:1200px){.modal-xl{max-width:1140px}}.tooltip{position:absolute;z-index:1070;display:block;margin:0;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,Liberation Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji;font-style:normal;font-weight:400;line-height:1.5;text-align:left;text-align:start;text-decoration:none;text-shadow:none;text-transform:none;letter-spacing:normal;word-break:normal;word-spacing:normal;white-space:normal;line-break:auto;font-size:.875rem;word-wrap:break-word;opacity:0}.tooltip.show{opacity:.9}.tooltip .arrow{position:absolute;display:block;width:.8rem;height:.4rem}.tooltip .arrow:before{position:absolute;content:"";border-color:transparent;border-style:solid}.bs-tooltip-auto[x-placement^=top],.bs-tooltip-top{padding:.4rem 0}.bs-tooltip-auto[x-placement^=top] .arrow,.bs-tooltip-top .arrow{bottom:0}.bs-tooltip-auto[x-placement^=top] .arrow:before,.bs-tooltip-top .arrow:before{top:0;border-width:.4rem .4rem 0;border-top-color:#000}.bs-tooltip-auto[x-placement^=right],.bs-tooltip-right{padding:0 .4rem}.bs-tooltip-auto[x-placement^=right] .arrow,.bs-tooltip-right .arrow{left:0;width:.4rem;height:.8rem}.bs-tooltip-auto[x-placement^=right] .arrow:before,.bs-tooltip-right .arrow:before{right:0;border-width:.4rem .4rem .4rem 0;border-right-color:#000}.bs-tooltip-auto[x-placement^=bottom],.bs-tooltip-bottom{padding:.4rem 0}.bs-tooltip-auto[x-placement^=bottom] .arrow,.bs-tooltip-bottom .arrow{top:0}.bs-tooltip-auto[x-placement^=bottom] .arrow:before,.bs-tooltip-bottom .arrow:before{bottom:0;border-width:0 .4rem .4rem;border-bottom-color:#000}.bs-tooltip-auto[x-placement^=left],.bs-tooltip-left{padding:0 .4rem}.bs-tooltip-auto[x-placement^=left] .arrow,.bs-tooltip-left .arrow{right:0;width:.4rem;height:.8rem}.bs-tooltip-auto[x-placement^=left] .arrow:before,.bs-tooltip-left .arrow:before{left:0;border-width:.4rem 0 .4rem .4rem;border-left-color:#000}.tooltip-inner{max-width:200px;padding:.25rem .5rem;color:#fff;text-align:center;background-color:#000;border-radius:.25rem}.popover{top:0;left:0;z-index:1060;max-width:276px;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,Liberation Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji;font-style:normal;font-weight:400;line-height:1.5;text-align:left;text-align:start;text-decoration:none;text-shadow:none;text-transform:none;letter-spacing:normal;word-break:normal;word-spacing:normal;white-space:normal;line-break:auto;font-size:.875rem;word-wrap:break-word;background-color:#fff;background-clip:padding-box;border:1px solid rgba(0,0,0,.2);border-radius:.3rem}.popover,.popover .arrow{position:absolute;display:block}.popover .arrow{width:1rem;height:.5rem;margin:0 .3rem}.popover .arrow:after,.popover .arrow:before{position:absolute;display:block;content:"";border-color:transparent;border-style:solid}.bs-popover-auto[x-placement^=top],.bs-popover-top{margin-bottom:.5rem}.bs-popover-auto[x-placement^=top]>.arrow,.bs-popover-top>.arrow{bottom:calc(-.5rem - 1px)}.bs-popover-auto[x-placement^=top]>.arrow:before,.bs-popover-top>.arrow:before{bottom:0;border-width:.5rem .5rem 0;border-top-color:rgba(0,0,0,.25)}.bs-popover-auto[x-placement^=top]>.arrow:after,.bs-popover-top>.arrow:after{bottom:1px;border-width:.5rem .5rem 0;border-top-color:#fff}.bs-popover-auto[x-placement^=right],.bs-popover-right{margin-left:.5rem}.bs-popover-auto[x-placement^=right]>.arrow,.bs-popover-right>.arrow{left:calc(-.5rem - 1px);width:.5rem;height:1rem;margin:.3rem 0}.bs-popover-auto[x-placement^=right]>.arrow:before,.bs-popover-right>.arrow:before{left:0;border-width:.5rem .5rem .5rem 0;border-right-color:rgba(0,0,0,.25)}.bs-popover-auto[x-placement^=right]>.arrow:after,.bs-popover-right>.arrow:after{left:1px;border-width:.5rem .5rem .5rem 0;border-right-color:#fff}.bs-popover-auto[x-placement^=bottom],.bs-popover-bottom{margin-top:.5rem}.bs-popover-auto[x-placement^=bottom]>.arrow,.bs-popover-bottom>.arrow{top:calc(-.5rem - 1px)}.bs-popover-auto[x-placement^=bottom]>.arrow:before,.bs-popover-bottom>.arrow:before{top:0;border-width:0 .5rem .5rem;border-bottom-color:rgba(0,0,0,.25)}.bs-popover-auto[x-placement^=bottom]>.arrow:after,.bs-popover-bottom>.arrow:after{top:1px;border-width:0 .5rem .5rem;border-bottom-color:#fff}.bs-popover-auto[x-placement^=bottom] .popover-header:before,.bs-popover-bottom .popover-header:before{position:absolute;top:0;left:50%;display:block;width:1rem;margin-left:-.5rem;content:"";border-bottom:1px solid #f7f7f7}.bs-popover-auto[x-placement^=left],.bs-popover-left{margin-right:.5rem}.bs-popover-auto[x-placement^=left]>.arrow,.bs-popover-left>.arrow{right:calc(-.5rem - 1px);width:.5rem;height:1rem;margin:.3rem 0}.bs-popover-auto[x-placement^=left]>.arrow:before,.bs-popover-left>.arrow:before{right:0;border-width:.5rem 0 .5rem .5rem;border-left-color:rgba(0,0,0,.25)}.bs-popover-auto[x-placement^=left]>.arrow:after,.bs-popover-left>.arrow:after{right:1px;border-width:.5rem 0 .5rem .5rem;border-left-color:#fff}.popover-header{padding:.5rem .75rem;margin-bottom:0;font-size:1rem;background-color:#f7f7f7;border-bottom:1px solid #ebebeb;border-top-left-radius:calc(.3rem - 1px);border-top-right-radius:calc(.3rem - 1px)}.popover-header:empty{display:none}.popover-body{padding:.5rem .75rem;color:#212529}.carousel{position:relative}.carousel.pointer-event{touch-action:pan-y}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel-inner:after{display:block;clear:both;content:""}.carousel-item{position:relative;display:none;float:left;width:100%;margin-right:-100%;backface-visibility:hidden;transition:transform .6s ease-in-out}@media (prefers-reduced-motion:reduce){.carousel-item{transition:none}}.carousel-item-next,.carousel-item-prev,.carousel-item.active{display:block}.active.carousel-item-right,.carousel-item-next:not(.carousel-item-left){transform:translateX(100%)}.active.carousel-item-left,.carousel-item-prev:not(.carousel-item-right){transform:translateX(-100%)}.carousel-fade .carousel-item{opacity:0;transition-property:opacity;transform:none}.carousel-fade .carousel-item-next.carousel-item-left,.carousel-fade .carousel-item-prev.carousel-item-right,.carousel-fade .carousel-item.active{z-index:1;opacity:1}.carousel-fade .active.carousel-item-left,.carousel-fade .active.carousel-item-right{z-index:0;opacity:0;transition:opacity 0s .6s}@media (prefers-reduced-motion:reduce){.carousel-fade .active.carousel-item-left,.carousel-fade .active.carousel-item-right{transition:none}}.carousel-control-next,.carousel-control-prev{position:absolute;top:0;bottom:0;z-index:1;display:flex;align-items:center;justify-content:center;width:15%;padding:0;color:#fff;text-align:center;background:none;border:0;opacity:.5;transition:opacity .15s ease}@media (prefers-reduced-motion:reduce){.carousel-control-next,.carousel-control-prev{transition:none}}.carousel-control-next:focus,.carousel-control-next:hover,.carousel-control-prev:focus,.carousel-control-prev:hover{color:#fff;text-decoration:none;outline:0;opacity:.9}.carousel-control-prev{left:0}.carousel-control-next{right:0}.carousel-control-next-icon,.carousel-control-prev-icon{display:inline-block;width:20px;height:20px;background:50%/100% 100% no-repeat}.carousel-control-prev-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' fill='%23fff' width='8' height='8'%3E%3Cpath d='M5.25 0l-4 4 4 4 1.5-1.5L4.25 4l2.5-2.5L5.25 0z'/%3E%3C/svg%3E")}.carousel-control-next-icon{background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' fill='%23fff' width='8' height='8'%3E%3Cpath d='M2.75 0l-1.5 1.5L3.75 4l-2.5 2.5L2.75 8l4-4-4-4z'/%3E%3C/svg%3E")}.carousel-indicators{position:absolute;right:0;bottom:0;left:0;z-index:15;display:flex;justify-content:center;padding-left:0;margin-right:15%;margin-left:15%;list-style:none}.carousel-indicators li{box-sizing:content-box;flex:0 1 auto;width:30px;height:3px;margin-right:3px;margin-left:3px;text-indent:-999px;cursor:pointer;background-color:#fff;background-clip:padding-box;border-top:10px solid transparent;border-bottom:10px solid transparent;opacity:.5;transition:opacity .6s ease}@media (prefers-reduced-motion:reduce){.carousel-indicators li{transition:none}}.carousel-indicators .active{opacity:1}.carousel-caption{position:absolute;right:15%;bottom:20px;left:15%;z-index:10;padding-top:20px;padding-bottom:20px;color:#fff;text-align:center}@keyframes spinner-border{to{transform:rotate(1turn)}}.spinner-border{display:inline-block;width:2rem;height:2rem;vertical-align:-.125em;border:.25em solid;border-right:.25em solid transparent;border-radius:50%;animation:spinner-border .75s linear infinite}.spinner-border-sm{width:1rem;height:1rem;border-width:.2em}@keyframes spinner-grow{0%{transform:scale(0)}50%{opacity:1;transform:none}}.spinner-grow{display:inline-block;width:2rem;height:2rem;vertical-align:-.125em;background-color:currentColor;border-radius:50%;opacity:0;animation:spinner-grow .75s linear infinite}.spinner-grow-sm{width:1rem;height:1rem}@media (prefers-reduced-motion:reduce){.spinner-border,.spinner-grow{animation-duration:1.5s}}.align-baseline{vertical-align:baseline!important}.align-top{vertical-align:top!important}.align-middle{vertical-align:middle!important}.align-bottom{vertical-align:bottom!important}.align-text-bottom{vertical-align:text-bottom!important}.align-text-top{vertical-align:text-top!important}.bg-primary{background-color:#007bff!important}a.bg-primary:focus,a.bg-primary:hover,button.bg-primary:focus,button.bg-primary:hover{background-color:#0062cc!important}.bg-secondary{background-color:#6c757d!important}a.bg-secondary:focus,a.bg-secondary:hover,button.bg-secondary:focus,button.bg-secondary:hover{background-color:#545b62!important}.bg-success{background-color:#28a745!important}a.bg-success:focus,a.bg-success:hover,button.bg-success:focus,button.bg-success:hover{background-color:#1e7e34!important}.bg-info{background-color:#17a2b8!important}a.bg-info:focus,a.bg-info:hover,button.bg-info:focus,button.bg-info:hover{background-color:#117a8b!important}.bg-warning{background-color:#ffc107!important}a.bg-warning:focus,a.bg-warning:hover,button.bg-warning:focus,button.bg-warning:hover{background-color:#d39e00!important}.bg-danger{background-color:#dc3545!important}a.bg-danger:focus,a.bg-danger:hover,button.bg-danger:focus,button.bg-danger:hover{background-color:#bd2130!important}.bg-light{background-color:#f8f9fa!important}a.bg-light:focus,a.bg-light:hover,button.bg-light:focus,button.bg-light:hover{background-color:#dae0e5!important}.bg-dark{background-color:#343a40!important}a.bg-dark:focus,a.bg-dark:hover,button.bg-dark:focus,button.bg-dark:hover{background-color:#1d2124!important}.bg-white{background-color:#fff!important}.bg-transparent{background-color:transparent!important}.border{border:1px solid #dee2e6!important}.border-top{border-top:1px solid #dee2e6!important}.border-right{border-right:1px solid #dee2e6!important}.border-bottom{border-bottom:1px solid #dee2e6!important}.border-left{border-left:1px solid #dee2e6!important}.border-0{border:0!important}.border-top-0{border-top:0!important}.border-right-0{border-right:0!important}.border-bottom-0{border-bottom:0!important}.border-left-0{border-left:0!important}.border-primary{border-color:#007bff!important}.border-secondary{border-color:#6c757d!important}.border-success{border-color:#28a745!important}.border-info{border-color:#17a2b8!important}.border-warning{border-color:#ffc107!important}.border-danger{border-color:#dc3545!important}.border-light{border-color:#f8f9fa!important}.border-dark{border-color:#343a40!important}.border-white{border-color:#fff!important}.rounded-sm{border-radius:.2rem!important}.rounded{border-radius:.25rem!important}.rounded-top{border-top-left-radius:.25rem!important}.rounded-right,.rounded-top{border-top-right-radius:.25rem!important}.rounded-bottom,.rounded-right{border-bottom-right-radius:.25rem!important}.rounded-bottom,.rounded-left{border-bottom-left-radius:.25rem!important}.rounded-left{border-top-left-radius:.25rem!important}.rounded-lg{border-radius:.3rem!important}.rounded-circle{border-radius:50%!important}.rounded-pill{border-radius:50rem!important}.rounded-0{border-radius:0!important}.clearfix:after{display:block;clear:both;content:""}.d-none{display:none!important}.d-inline{display:inline!important}.d-inline-block{display:inline-block!important}.d-block{display:block!important}.d-table{display:table!important}.d-table-row{display:table-row!important}.d-table-cell{display:table-cell!important}.d-flex{display:flex!important}.d-inline-flex{display:inline-flex!important}@media (min-width:540px){.d-sm-none{display:none!important}.d-sm-inline{display:inline!important}.d-sm-inline-block{display:inline-block!important}.d-sm-block{display:block!important}.d-sm-table{display:table!important}.d-sm-table-row{display:table-row!important}.d-sm-table-cell{display:table-cell!important}.d-sm-flex{display:flex!important}.d-sm-inline-flex{display:inline-flex!important}}@media (min-width:720px){.d-md-none{display:none!important}.d-md-inline{display:inline!important}.d-md-inline-block{display:inline-block!important}.d-md-block{display:block!important}.d-md-table{display:table!important}.d-md-table-row{display:table-row!important}.d-md-table-cell{display:table-cell!important}.d-md-flex{display:flex!important}.d-md-inline-flex{display:inline-flex!important}}@media (min-width:960px){.d-lg-none{display:none!important}.d-lg-inline{display:inline!important}.d-lg-inline-block{display:inline-block!important}.d-lg-block{display:block!important}.d-lg-table{display:table!important}.d-lg-table-row{display:table-row!important}.d-lg-table-cell{display:table-cell!important}.d-lg-flex{display:flex!important}.d-lg-inline-flex{display:inline-flex!important}}@media (min-width:1200px){.d-xl-none{display:none!important}.d-xl-inline{display:inline!important}.d-xl-inline-block{display:inline-block!important}.d-xl-block{display:block!important}.d-xl-table{display:table!important}.d-xl-table-row{display:table-row!important}.d-xl-table-cell{display:table-cell!important}.d-xl-flex{display:flex!important}.d-xl-inline-flex{display:inline-flex!important}}@media print{.d-print-none{display:none!important}.d-print-inline{display:inline!important}.d-print-inline-block{display:inline-block!important}.d-print-block{display:block!important}.d-print-table{display:table!important}.d-print-table-row{display:table-row!important}.d-print-table-cell{display:table-cell!important}.d-print-flex{display:flex!important}.d-print-inline-flex{display:inline-flex!important}}.embed-responsive{position:relative;display:block;width:100%;padding:0;overflow:hidden}.embed-responsive:before{display:block;content:""}.embed-responsive .embed-responsive-item,.embed-responsive embed,.embed-responsive iframe,.embed-responsive object,.embed-responsive video{position:absolute;top:0;bottom:0;left:0;width:100%;height:100%;border:0}.embed-responsive-21by9:before{padding-top:42.85714%}.embed-responsive-16by9:before{padding-top:56.25%}.embed-responsive-4by3:before{padding-top:75%}.embed-responsive-1by1:before{padding-top:100%}.flex-row{flex-direction:row!important}.flex-column{flex-direction:column!important}.flex-row-reverse{flex-direction:row-reverse!important}.flex-column-reverse{flex-direction:column-reverse!important}.flex-wrap{flex-wrap:wrap!important}.flex-nowrap{flex-wrap:nowrap!important}.flex-wrap-reverse{flex-wrap:wrap-reverse!important}.flex-fill{flex:1 1 auto!important}.flex-grow-0{flex-grow:0!important}.flex-grow-1{flex-grow:1!important}.flex-shrink-0{flex-shrink:0!important}.flex-shrink-1{flex-shrink:1!important}.justify-content-start{justify-content:flex-start!important}.justify-content-end{justify-content:flex-end!important}.justify-content-center{justify-content:center!important}.justify-content-between{justify-content:space-between!important}.justify-content-around{justify-content:space-around!important}.align-items-start{align-items:flex-start!important}.align-items-end{align-items:flex-end!important}.align-items-center{align-items:center!important}.align-items-baseline{align-items:baseline!important}.align-items-stretch{align-items:stretch!important}.align-content-start{align-content:flex-start!important}.align-content-end{align-content:flex-end!important}.align-content-center{align-content:center!important}.align-content-between{align-content:space-between!important}.align-content-around{align-content:space-around!important}.align-content-stretch{align-content:stretch!important}.align-self-auto{align-self:auto!important}.align-self-start{align-self:flex-start!important}.align-self-end{align-self:flex-end!important}.align-self-center{align-self:center!important}.align-self-baseline{align-self:baseline!important}.align-self-stretch{align-self:stretch!important}@media (min-width:540px){.flex-sm-row{flex-direction:row!important}.flex-sm-column{flex-direction:column!important}.flex-sm-row-reverse{flex-direction:row-reverse!important}.flex-sm-column-reverse{flex-direction:column-reverse!important}.flex-sm-wrap{flex-wrap:wrap!important}.flex-sm-nowrap{flex-wrap:nowrap!important}.flex-sm-wrap-reverse{flex-wrap:wrap-reverse!important}.flex-sm-fill{flex:1 1 auto!important}.flex-sm-grow-0{flex-grow:0!important}.flex-sm-grow-1{flex-grow:1!important}.flex-sm-shrink-0{flex-shrink:0!important}.flex-sm-shrink-1{flex-shrink:1!important}.justify-content-sm-start{justify-content:flex-start!important}.justify-content-sm-end{justify-content:flex-end!important}.justify-content-sm-center{justify-content:center!important}.justify-content-sm-between{justify-content:space-between!important}.justify-content-sm-around{justify-content:space-around!important}.align-items-sm-start{align-items:flex-start!important}.align-items-sm-end{align-items:flex-end!important}.align-items-sm-center{align-items:center!important}.align-items-sm-baseline{align-items:baseline!important}.align-items-sm-stretch{align-items:stretch!important}.align-content-sm-start{align-content:flex-start!important}.align-content-sm-end{align-content:flex-end!important}.align-content-sm-center{align-content:center!important}.align-content-sm-between{align-content:space-between!important}.align-content-sm-around{align-content:space-around!important}.align-content-sm-stretch{align-content:stretch!important}.align-self-sm-auto{align-self:auto!important}.align-self-sm-start{align-self:flex-start!important}.align-self-sm-end{align-self:flex-end!important}.align-self-sm-center{align-self:center!important}.align-self-sm-baseline{align-self:baseline!important}.align-self-sm-stretch{align-self:stretch!important}}@media (min-width:720px){.flex-md-row{flex-direction:row!important}.flex-md-column{flex-direction:column!important}.flex-md-row-reverse{flex-direction:row-reverse!important}.flex-md-column-reverse{flex-direction:column-reverse!important}.flex-md-wrap{flex-wrap:wrap!important}.flex-md-nowrap{flex-wrap:nowrap!important}.flex-md-wrap-reverse{flex-wrap:wrap-reverse!important}.flex-md-fill{flex:1 1 auto!important}.flex-md-grow-0{flex-grow:0!important}.flex-md-grow-1{flex-grow:1!important}.flex-md-shrink-0{flex-shrink:0!important}.flex-md-shrink-1{flex-shrink:1!important}.justify-content-md-start{justify-content:flex-start!important}.justify-content-md-end{justify-content:flex-end!important}.justify-content-md-center{justify-content:center!important}.justify-content-md-between{justify-content:space-between!important}.justify-content-md-around{justify-content:space-around!important}.align-items-md-start{align-items:flex-start!important}.align-items-md-end{align-items:flex-end!important}.align-items-md-center{align-items:center!important}.align-items-md-baseline{align-items:baseline!important}.align-items-md-stretch{align-items:stretch!important}.align-content-md-start{align-content:flex-start!important}.align-content-md-end{align-content:flex-end!important}.align-content-md-center{align-content:center!important}.align-content-md-between{align-content:space-between!important}.align-content-md-around{align-content:space-around!important}.align-content-md-stretch{align-content:stretch!important}.align-self-md-auto{align-self:auto!important}.align-self-md-start{align-self:flex-start!important}.align-self-md-end{align-self:flex-end!important}.align-self-md-center{align-self:center!important}.align-self-md-baseline{align-self:baseline!important}.align-self-md-stretch{align-self:stretch!important}}@media (min-width:960px){.flex-lg-row{flex-direction:row!important}.flex-lg-column{flex-direction:column!important}.flex-lg-row-reverse{flex-direction:row-reverse!important}.flex-lg-column-reverse{flex-direction:column-reverse!important}.flex-lg-wrap{flex-wrap:wrap!important}.flex-lg-nowrap{flex-wrap:nowrap!important}.flex-lg-wrap-reverse{flex-wrap:wrap-reverse!important}.flex-lg-fill{flex:1 1 auto!important}.flex-lg-grow-0{flex-grow:0!important}.flex-lg-grow-1{flex-grow:1!important}.flex-lg-shrink-0{flex-shrink:0!important}.flex-lg-shrink-1{flex-shrink:1!important}.justify-content-lg-start{justify-content:flex-start!important}.justify-content-lg-end{justify-content:flex-end!important}.justify-content-lg-center{justify-content:center!important}.justify-content-lg-between{justify-content:space-between!important}.justify-content-lg-around{justify-content:space-around!important}.align-items-lg-start{align-items:flex-start!important}.align-items-lg-end{align-items:flex-end!important}.align-items-lg-center{align-items:center!important}.align-items-lg-baseline{align-items:baseline!important}.align-items-lg-stretch{align-items:stretch!important}.align-content-lg-start{align-content:flex-start!important}.align-content-lg-end{align-content:flex-end!important}.align-content-lg-center{align-content:center!important}.align-content-lg-between{align-content:space-between!important}.align-content-lg-around{align-content:space-around!important}.align-content-lg-stretch{align-content:stretch!important}.align-self-lg-auto{align-self:auto!important}.align-self-lg-start{align-self:flex-start!important}.align-self-lg-end{align-self:flex-end!important}.align-self-lg-center{align-self:center!important}.align-self-lg-baseline{align-self:baseline!important}.align-self-lg-stretch{align-self:stretch!important}}@media (min-width:1200px){.flex-xl-row{flex-direction:row!important}.flex-xl-column{flex-direction:column!important}.flex-xl-row-reverse{flex-direction:row-reverse!important}.flex-xl-column-reverse{flex-direction:column-reverse!important}.flex-xl-wrap{flex-wrap:wrap!important}.flex-xl-nowrap{flex-wrap:nowrap!important}.flex-xl-wrap-reverse{flex-wrap:wrap-reverse!important}.flex-xl-fill{flex:1 1 auto!important}.flex-xl-grow-0{flex-grow:0!important}.flex-xl-grow-1{flex-grow:1!important}.flex-xl-shrink-0{flex-shrink:0!important}.flex-xl-shrink-1{flex-shrink:1!important}.justify-content-xl-start{justify-content:flex-start!important}.justify-content-xl-end{justify-content:flex-end!important}.justify-content-xl-center{justify-content:center!important}.justify-content-xl-between{justify-content:space-between!important}.justify-content-xl-around{justify-content:space-around!important}.align-items-xl-start{align-items:flex-start!important}.align-items-xl-end{align-items:flex-end!important}.align-items-xl-center{align-items:center!important}.align-items-xl-baseline{align-items:baseline!important}.align-items-xl-stretch{align-items:stretch!important}.align-content-xl-start{align-content:flex-start!important}.align-content-xl-end{align-content:flex-end!important}.align-content-xl-center{align-content:center!important}.align-content-xl-between{align-content:space-between!important}.align-content-xl-around{align-content:space-around!important}.align-content-xl-stretch{align-content:stretch!important}.align-self-xl-auto{align-self:auto!important}.align-self-xl-start{align-self:flex-start!important}.align-self-xl-end{align-self:flex-end!important}.align-self-xl-center{align-self:center!important}.align-self-xl-baseline{align-self:baseline!important}.align-self-xl-stretch{align-self:stretch!important}}.float-left{float:left!important}.float-right{float:right!important}.float-none{float:none!important}@media (min-width:540px){.float-sm-left{float:left!important}.float-sm-right{float:right!important}.float-sm-none{float:none!important}}@media (min-width:720px){.float-md-left{float:left!important}.float-md-right{float:right!important}.float-md-none{float:none!important}}@media (min-width:960px){.float-lg-left{float:left!important}.float-lg-right{float:right!important}.float-lg-none{float:none!important}}@media (min-width:1200px){.float-xl-left{float:left!important}.float-xl-right{float:right!important}.float-xl-none{float:none!important}}.user-select-all{user-select:all!important}.user-select-auto{user-select:auto!important}.user-select-none{user-select:none!important}.overflow-auto{overflow:auto!important}.overflow-hidden{overflow:hidden!important}.position-static{position:static!important}.position-relative{position:relative!important}.position-absolute{position:absolute!important}.position-fixed{position:fixed!important}.position-sticky{position:sticky!important}.fixed-top{top:0}.fixed-bottom,.fixed-top{position:fixed;right:0;left:0;z-index:1030}.fixed-bottom{bottom:0}@supports (position:sticky){.sticky-top{position:sticky;top:0;z-index:1020}}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0,0,0,0);white-space:nowrap;border:0}.sr-only-focusable:active,.sr-only-focusable:focus{position:static;width:auto;height:auto;overflow:visible;clip:auto;white-space:normal}.shadow-sm{box-shadow:0 .125rem .25rem rgba(0,0,0,.075)!important}.shadow{box-shadow:0 .5rem 1rem rgba(0,0,0,.15)!important}.shadow-lg{box-shadow:0 1rem 3rem rgba(0,0,0,.175)!important}.shadow-none{box-shadow:none!important}.w-25{width:25%!important}.w-50{width:50%!important}.w-75{width:75%!important}.w-100{width:100%!important}.w-auto{width:auto!important}.h-25{height:25%!important}.h-50{height:50%!important}.h-75{height:75%!important}.h-100{height:100%!important}.h-auto{height:auto!important}.mw-100{max-width:100%!important}.mh-100{max-height:100%!important}.min-vw-100{min-width:100vw!important}.min-vh-100{min-height:100vh!important}.vw-100{width:100vw!important}.vh-100{height:100vh!important}.m-0{margin:0!important}.mt-0,.my-0{margin-top:0!important}.mr-0,.mx-0{margin-right:0!important}.mb-0,.my-0{margin-bottom:0!important}.ml-0,.mx-0{margin-left:0!important}.m-1{margin:.25rem!important}.mt-1,.my-1{margin-top:.25rem!important}.mr-1,.mx-1{margin-right:.25rem!important}.mb-1,.my-1{margin-bottom:.25rem!important}.ml-1,.mx-1{margin-left:.25rem!important}.m-2{margin:.5rem!important}.mt-2,.my-2{margin-top:.5rem!important}.mr-2,.mx-2{margin-right:.5rem!important}.mb-2,.my-2{margin-bottom:.5rem!important}.ml-2,.mx-2{margin-left:.5rem!important}.m-3{margin:1rem!important}.mt-3,.my-3{margin-top:1rem!important}.mr-3,.mx-3{margin-right:1rem!important}.mb-3,.my-3{margin-bottom:1rem!important}.ml-3,.mx-3{margin-left:1rem!important}.m-4{margin:1.5rem!important}.mt-4,.my-4{margin-top:1.5rem!important}.mr-4,.mx-4{margin-right:1.5rem!important}.mb-4,.my-4{margin-bottom:1.5rem!important}.ml-4,.mx-4{margin-left:1.5rem!important}.m-5{margin:3rem!important}.mt-5,.my-5{margin-top:3rem!important}.mr-5,.mx-5{margin-right:3rem!important}.mb-5,.my-5{margin-bottom:3rem!important}.ml-5,.mx-5{margin-left:3rem!important}.p-0{padding:0!important}.pt-0,.py-0{padding-top:0!important}.pr-0,.px-0{padding-right:0!important}.pb-0,.py-0{padding-bottom:0!important}.pl-0,.px-0{padding-left:0!important}.p-1{padding:.25rem!important}.pt-1,.py-1{padding-top:.25rem!important}.pr-1,.px-1{padding-right:.25rem!important}.pb-1,.py-1{padding-bottom:.25rem!important}.pl-1,.px-1{padding-left:.25rem!important}.p-2{padding:.5rem!important}.pt-2,.py-2{padding-top:.5rem!important}.pr-2,.px-2{padding-right:.5rem!important}.pb-2,.py-2{padding-bottom:.5rem!important}.pl-2,.px-2{padding-left:.5rem!important}.p-3{padding:1rem!important}.pt-3,.py-3{padding-top:1rem!important}.pr-3,.px-3{padding-right:1rem!important}.pb-3,.py-3{padding-bottom:1rem!important}.pl-3,.px-3{padding-left:1rem!important}.p-4{padding:1.5rem!important}.pt-4,.py-4{padding-top:1.5rem!important}.pr-4,.px-4{padding-right:1.5rem!important}.pb-4,.py-4{padding-bottom:1.5rem!important}.pl-4,.px-4{padding-left:1.5rem!important}.p-5{padding:3rem!important}.pt-5,.py-5{padding-top:3rem!important}.pr-5,.px-5{padding-right:3rem!important}.pb-5,.py-5{padding-bottom:3rem!important}.pl-5,.px-5{padding-left:3rem!important}.m-n1{margin:-.25rem!important}.mt-n1,.my-n1{margin-top:-.25rem!important}.mr-n1,.mx-n1{margin-right:-.25rem!important}.mb-n1,.my-n1{margin-bottom:-.25rem!important}.ml-n1,.mx-n1{margin-left:-.25rem!important}.m-n2{margin:-.5rem!important}.mt-n2,.my-n2{margin-top:-.5rem!important}.mr-n2,.mx-n2{margin-right:-.5rem!important}.mb-n2,.my-n2{margin-bottom:-.5rem!important}.ml-n2,.mx-n2{margin-left:-.5rem!important}.m-n3{margin:-1rem!important}.mt-n3,.my-n3{margin-top:-1rem!important}.mr-n3,.mx-n3{margin-right:-1rem!important}.mb-n3,.my-n3{margin-bottom:-1rem!important}.ml-n3,.mx-n3{margin-left:-1rem!important}.m-n4{margin:-1.5rem!important}.mt-n4,.my-n4{margin-top:-1.5rem!important}.mr-n4,.mx-n4{margin-right:-1.5rem!important}.mb-n4,.my-n4{margin-bottom:-1.5rem!important}.ml-n4,.mx-n4{margin-left:-1.5rem!important}.m-n5{margin:-3rem!important}.mt-n5,.my-n5{margin-top:-3rem!important}.mr-n5,.mx-n5{margin-right:-3rem!important}.mb-n5,.my-n5{margin-bottom:-3rem!important}.ml-n5,.mx-n5{margin-left:-3rem!important}.m-auto{margin:auto!important}.mt-auto,.my-auto{margin-top:auto!important}.mr-auto,.mx-auto{margin-right:auto!important}.mb-auto,.my-auto{margin-bottom:auto!important}.ml-auto,.mx-auto{margin-left:auto!important}@media (min-width:540px){.m-sm-0{margin:0!important}.mt-sm-0,.my-sm-0{margin-top:0!important}.mr-sm-0,.mx-sm-0{margin-right:0!important}.mb-sm-0,.my-sm-0{margin-bottom:0!important}.ml-sm-0,.mx-sm-0{margin-left:0!important}.m-sm-1{margin:.25rem!important}.mt-sm-1,.my-sm-1{margin-top:.25rem!important}.mr-sm-1,.mx-sm-1{margin-right:.25rem!important}.mb-sm-1,.my-sm-1{margin-bottom:.25rem!important}.ml-sm-1,.mx-sm-1{margin-left:.25rem!important}.m-sm-2{margin:.5rem!important}.mt-sm-2,.my-sm-2{margin-top:.5rem!important}.mr-sm-2,.mx-sm-2{margin-right:.5rem!important}.mb-sm-2,.my-sm-2{margin-bottom:.5rem!important}.ml-sm-2,.mx-sm-2{margin-left:.5rem!important}.m-sm-3{margin:1rem!important}.mt-sm-3,.my-sm-3{margin-top:1rem!important}.mr-sm-3,.mx-sm-3{margin-right:1rem!important}.mb-sm-3,.my-sm-3{margin-bottom:1rem!important}.ml-sm-3,.mx-sm-3{margin-left:1rem!important}.m-sm-4{margin:1.5rem!important}.mt-sm-4,.my-sm-4{margin-top:1.5rem!important}.mr-sm-4,.mx-sm-4{margin-right:1.5rem!important}.mb-sm-4,.my-sm-4{margin-bottom:1.5rem!important}.ml-sm-4,.mx-sm-4{margin-left:1.5rem!important}.m-sm-5{margin:3rem!important}.mt-sm-5,.my-sm-5{margin-top:3rem!important}.mr-sm-5,.mx-sm-5{margin-right:3rem!important}.mb-sm-5,.my-sm-5{margin-bottom:3rem!important}.ml-sm-5,.mx-sm-5{margin-left:3rem!important}.p-sm-0{padding:0!important}.pt-sm-0,.py-sm-0{padding-top:0!important}.pr-sm-0,.px-sm-0{padding-right:0!important}.pb-sm-0,.py-sm-0{padding-bottom:0!important}.pl-sm-0,.px-sm-0{padding-left:0!important}.p-sm-1{padding:.25rem!important}.pt-sm-1,.py-sm-1{padding-top:.25rem!important}.pr-sm-1,.px-sm-1{padding-right:.25rem!important}.pb-sm-1,.py-sm-1{padding-bottom:.25rem!important}.pl-sm-1,.px-sm-1{padding-left:.25rem!important}.p-sm-2{padding:.5rem!important}.pt-sm-2,.py-sm-2{padding-top:.5rem!important}.pr-sm-2,.px-sm-2{padding-right:.5rem!important}.pb-sm-2,.py-sm-2{padding-bottom:.5rem!important}.pl-sm-2,.px-sm-2{padding-left:.5rem!important}.p-sm-3{padding:1rem!important}.pt-sm-3,.py-sm-3{padding-top:1rem!important}.pr-sm-3,.px-sm-3{padding-right:1rem!important}.pb-sm-3,.py-sm-3{padding-bottom:1rem!important}.pl-sm-3,.px-sm-3{padding-left:1rem!important}.p-sm-4{padding:1.5rem!important}.pt-sm-4,.py-sm-4{padding-top:1.5rem!important}.pr-sm-4,.px-sm-4{padding-right:1.5rem!important}.pb-sm-4,.py-sm-4{padding-bottom:1.5rem!important}.pl-sm-4,.px-sm-4{padding-left:1.5rem!important}.p-sm-5{padding:3rem!important}.pt-sm-5,.py-sm-5{padding-top:3rem!important}.pr-sm-5,.px-sm-5{padding-right:3rem!important}.pb-sm-5,.py-sm-5{padding-bottom:3rem!important}.pl-sm-5,.px-sm-5{padding-left:3rem!important}.m-sm-n1{margin:-.25rem!important}.mt-sm-n1,.my-sm-n1{margin-top:-.25rem!important}.mr-sm-n1,.mx-sm-n1{margin-right:-.25rem!important}.mb-sm-n1,.my-sm-n1{margin-bottom:-.25rem!important}.ml-sm-n1,.mx-sm-n1{margin-left:-.25rem!important}.m-sm-n2{margin:-.5rem!important}.mt-sm-n2,.my-sm-n2{margin-top:-.5rem!important}.mr-sm-n2,.mx-sm-n2{margin-right:-.5rem!important}.mb-sm-n2,.my-sm-n2{margin-bottom:-.5rem!important}.ml-sm-n2,.mx-sm-n2{margin-left:-.5rem!important}.m-sm-n3{margin:-1rem!important}.mt-sm-n3,.my-sm-n3{margin-top:-1rem!important}.mr-sm-n3,.mx-sm-n3{margin-right:-1rem!important}.mb-sm-n3,.my-sm-n3{margin-bottom:-1rem!important}.ml-sm-n3,.mx-sm-n3{margin-left:-1rem!important}.m-sm-n4{margin:-1.5rem!important}.mt-sm-n4,.my-sm-n4{margin-top:-1.5rem!important}.mr-sm-n4,.mx-sm-n4{margin-right:-1.5rem!important}.mb-sm-n4,.my-sm-n4{margin-bottom:-1.5rem!important}.ml-sm-n4,.mx-sm-n4{margin-left:-1.5rem!important}.m-sm-n5{margin:-3rem!important}.mt-sm-n5,.my-sm-n5{margin-top:-3rem!important}.mr-sm-n5,.mx-sm-n5{margin-right:-3rem!important}.mb-sm-n5,.my-sm-n5{margin-bottom:-3rem!important}.ml-sm-n5,.mx-sm-n5{margin-left:-3rem!important}.m-sm-auto{margin:auto!important}.mt-sm-auto,.my-sm-auto{margin-top:auto!important}.mr-sm-auto,.mx-sm-auto{margin-right:auto!important}.mb-sm-auto,.my-sm-auto{margin-bottom:auto!important}.ml-sm-auto,.mx-sm-auto{margin-left:auto!important}}@media (min-width:720px){.m-md-0{margin:0!important}.mt-md-0,.my-md-0{margin-top:0!important}.mr-md-0,.mx-md-0{margin-right:0!important}.mb-md-0,.my-md-0{margin-bottom:0!important}.ml-md-0,.mx-md-0{margin-left:0!important}.m-md-1{margin:.25rem!important}.mt-md-1,.my-md-1{margin-top:.25rem!important}.mr-md-1,.mx-md-1{margin-right:.25rem!important}.mb-md-1,.my-md-1{margin-bottom:.25rem!important}.ml-md-1,.mx-md-1{margin-left:.25rem!important}.m-md-2{margin:.5rem!important}.mt-md-2,.my-md-2{margin-top:.5rem!important}.mr-md-2,.mx-md-2{margin-right:.5rem!important}.mb-md-2,.my-md-2{margin-bottom:.5rem!important}.ml-md-2,.mx-md-2{margin-left:.5rem!important}.m-md-3{margin:1rem!important}.mt-md-3,.my-md-3{margin-top:1rem!important}.mr-md-3,.mx-md-3{margin-right:1rem!important}.mb-md-3,.my-md-3{margin-bottom:1rem!important}.ml-md-3,.mx-md-3{margin-left:1rem!important}.m-md-4{margin:1.5rem!important}.mt-md-4,.my-md-4{margin-top:1.5rem!important}.mr-md-4,.mx-md-4{margin-right:1.5rem!important}.mb-md-4,.my-md-4{margin-bottom:1.5rem!important}.ml-md-4,.mx-md-4{margin-left:1.5rem!important}.m-md-5{margin:3rem!important}.mt-md-5,.my-md-5{margin-top:3rem!important}.mr-md-5,.mx-md-5{margin-right:3rem!important}.mb-md-5,.my-md-5{margin-bottom:3rem!important}.ml-md-5,.mx-md-5{margin-left:3rem!important}.p-md-0{padding:0!important}.pt-md-0,.py-md-0{padding-top:0!important}.pr-md-0,.px-md-0{padding-right:0!important}.pb-md-0,.py-md-0{padding-bottom:0!important}.pl-md-0,.px-md-0{padding-left:0!important}.p-md-1{padding:.25rem!important}.pt-md-1,.py-md-1{padding-top:.25rem!important}.pr-md-1,.px-md-1{padding-right:.25rem!important}.pb-md-1,.py-md-1{padding-bottom:.25rem!important}.pl-md-1,.px-md-1{padding-left:.25rem!important}.p-md-2{padding:.5rem!important}.pt-md-2,.py-md-2{padding-top:.5rem!important}.pr-md-2,.px-md-2{padding-right:.5rem!important}.pb-md-2,.py-md-2{padding-bottom:.5rem!important}.pl-md-2,.px-md-2{padding-left:.5rem!important}.p-md-3{padding:1rem!important}.pt-md-3,.py-md-3{padding-top:1rem!important}.pr-md-3,.px-md-3{padding-right:1rem!important}.pb-md-3,.py-md-3{padding-bottom:1rem!important}.pl-md-3,.px-md-3{padding-left:1rem!important}.p-md-4{padding:1.5rem!important}.pt-md-4,.py-md-4{padding-top:1.5rem!important}.pr-md-4,.px-md-4{padding-right:1.5rem!important}.pb-md-4,.py-md-4{padding-bottom:1.5rem!important}.pl-md-4,.px-md-4{padding-left:1.5rem!important}.p-md-5{padding:3rem!important}.pt-md-5,.py-md-5{padding-top:3rem!important}.pr-md-5,.px-md-5{padding-right:3rem!important}.pb-md-5,.py-md-5{padding-bottom:3rem!important}.pl-md-5,.px-md-5{padding-left:3rem!important}.m-md-n1{margin:-.25rem!important}.mt-md-n1,.my-md-n1{margin-top:-.25rem!important}.mr-md-n1,.mx-md-n1{margin-right:-.25rem!important}.mb-md-n1,.my-md-n1{margin-bottom:-.25rem!important}.ml-md-n1,.mx-md-n1{margin-left:-.25rem!important}.m-md-n2{margin:-.5rem!important}.mt-md-n2,.my-md-n2{margin-top:-.5rem!important}.mr-md-n2,.mx-md-n2{margin-right:-.5rem!important}.mb-md-n2,.my-md-n2{margin-bottom:-.5rem!important}.ml-md-n2,.mx-md-n2{margin-left:-.5rem!important}.m-md-n3{margin:-1rem!important}.mt-md-n3,.my-md-n3{margin-top:-1rem!important}.mr-md-n3,.mx-md-n3{margin-right:-1rem!important}.mb-md-n3,.my-md-n3{margin-bottom:-1rem!important}.ml-md-n3,.mx-md-n3{margin-left:-1rem!important}.m-md-n4{margin:-1.5rem!important}.mt-md-n4,.my-md-n4{margin-top:-1.5rem!important}.mr-md-n4,.mx-md-n4{margin-right:-1.5rem!important}.mb-md-n4,.my-md-n4{margin-bottom:-1.5rem!important}.ml-md-n4,.mx-md-n4{margin-left:-1.5rem!important}.m-md-n5{margin:-3rem!important}.mt-md-n5,.my-md-n5{margin-top:-3rem!important}.mr-md-n5,.mx-md-n5{margin-right:-3rem!important}.mb-md-n5,.my-md-n5{margin-bottom:-3rem!important}.ml-md-n5,.mx-md-n5{margin-left:-3rem!important}.m-md-auto{margin:auto!important}.mt-md-auto,.my-md-auto{margin-top:auto!important}.mr-md-auto,.mx-md-auto{margin-right:auto!important}.mb-md-auto,.my-md-auto{margin-bottom:auto!important}.ml-md-auto,.mx-md-auto{margin-left:auto!important}}@media (min-width:960px){.m-lg-0{margin:0!important}.mt-lg-0,.my-lg-0{margin-top:0!important}.mr-lg-0,.mx-lg-0{margin-right:0!important}.mb-lg-0,.my-lg-0{margin-bottom:0!important}.ml-lg-0,.mx-lg-0{margin-left:0!important}.m-lg-1{margin:.25rem!important}.mt-lg-1,.my-lg-1{margin-top:.25rem!important}.mr-lg-1,.mx-lg-1{margin-right:.25rem!important}.mb-lg-1,.my-lg-1{margin-bottom:.25rem!important}.ml-lg-1,.mx-lg-1{margin-left:.25rem!important}.m-lg-2{margin:.5rem!important}.mt-lg-2,.my-lg-2{margin-top:.5rem!important}.mr-lg-2,.mx-lg-2{margin-right:.5rem!important}.mb-lg-2,.my-lg-2{margin-bottom:.5rem!important}.ml-lg-2,.mx-lg-2{margin-left:.5rem!important}.m-lg-3{margin:1rem!important}.mt-lg-3,.my-lg-3{margin-top:1rem!important}.mr-lg-3,.mx-lg-3{margin-right:1rem!important}.mb-lg-3,.my-lg-3{margin-bottom:1rem!important}.ml-lg-3,.mx-lg-3{margin-left:1rem!important}.m-lg-4{margin:1.5rem!important}.mt-lg-4,.my-lg-4{margin-top:1.5rem!important}.mr-lg-4,.mx-lg-4{margin-right:1.5rem!important}.mb-lg-4,.my-lg-4{margin-bottom:1.5rem!important}.ml-lg-4,.mx-lg-4{margin-left:1.5rem!important}.m-lg-5{margin:3rem!important}.mt-lg-5,.my-lg-5{margin-top:3rem!important}.mr-lg-5,.mx-lg-5{margin-right:3rem!important}.mb-lg-5,.my-lg-5{margin-bottom:3rem!important}.ml-lg-5,.mx-lg-5{margin-left:3rem!important}.p-lg-0{padding:0!important}.pt-lg-0,.py-lg-0{padding-top:0!important}.pr-lg-0,.px-lg-0{padding-right:0!important}.pb-lg-0,.py-lg-0{padding-bottom:0!important}.pl-lg-0,.px-lg-0{padding-left:0!important}.p-lg-1{padding:.25rem!important}.pt-lg-1,.py-lg-1{padding-top:.25rem!important}.pr-lg-1,.px-lg-1{padding-right:.25rem!important}.pb-lg-1,.py-lg-1{padding-bottom:.25rem!important}.pl-lg-1,.px-lg-1{padding-left:.25rem!important}.p-lg-2{padding:.5rem!important}.pt-lg-2,.py-lg-2{padding-top:.5rem!important}.pr-lg-2,.px-lg-2{padding-right:.5rem!important}.pb-lg-2,.py-lg-2{padding-bottom:.5rem!important}.pl-lg-2,.px-lg-2{padding-left:.5rem!important}.p-lg-3{padding:1rem!important}.pt-lg-3,.py-lg-3{padding-top:1rem!important}.pr-lg-3,.px-lg-3{padding-right:1rem!important}.pb-lg-3,.py-lg-3{padding-bottom:1rem!important}.pl-lg-3,.px-lg-3{padding-left:1rem!important}.p-lg-4{padding:1.5rem!important}.pt-lg-4,.py-lg-4{padding-top:1.5rem!important}.pr-lg-4,.px-lg-4{padding-right:1.5rem!important}.pb-lg-4,.py-lg-4{padding-bottom:1.5rem!important}.pl-lg-4,.px-lg-4{padding-left:1.5rem!important}.p-lg-5{padding:3rem!important}.pt-lg-5,.py-lg-5{padding-top:3rem!important}.pr-lg-5,.px-lg-5{padding-right:3rem!important}.pb-lg-5,.py-lg-5{padding-bottom:3rem!important}.pl-lg-5,.px-lg-5{padding-left:3rem!important}.m-lg-n1{margin:-.25rem!important}.mt-lg-n1,.my-lg-n1{margin-top:-.25rem!important}.mr-lg-n1,.mx-lg-n1{margin-right:-.25rem!important}.mb-lg-n1,.my-lg-n1{margin-bottom:-.25rem!important}.ml-lg-n1,.mx-lg-n1{margin-left:-.25rem!important}.m-lg-n2{margin:-.5rem!important}.mt-lg-n2,.my-lg-n2{margin-top:-.5rem!important}.mr-lg-n2,.mx-lg-n2{margin-right:-.5rem!important}.mb-lg-n2,.my-lg-n2{margin-bottom:-.5rem!important}.ml-lg-n2,.mx-lg-n2{margin-left:-.5rem!important}.m-lg-n3{margin:-1rem!important}.mt-lg-n3,.my-lg-n3{margin-top:-1rem!important}.mr-lg-n3,.mx-lg-n3{margin-right:-1rem!important}.mb-lg-n3,.my-lg-n3{margin-bottom:-1rem!important}.ml-lg-n3,.mx-lg-n3{margin-left:-1rem!important}.m-lg-n4{margin:-1.5rem!important}.mt-lg-n4,.my-lg-n4{margin-top:-1.5rem!important}.mr-lg-n4,.mx-lg-n4{margin-right:-1.5rem!important}.mb-lg-n4,.my-lg-n4{margin-bottom:-1.5rem!important}.ml-lg-n4,.mx-lg-n4{margin-left:-1.5rem!important}.m-lg-n5{margin:-3rem!important}.mt-lg-n5,.my-lg-n5{margin-top:-3rem!important}.mr-lg-n5,.mx-lg-n5{margin-right:-3rem!important}.mb-lg-n5,.my-lg-n5{margin-bottom:-3rem!important}.ml-lg-n5,.mx-lg-n5{margin-left:-3rem!important}.m-lg-auto{margin:auto!important}.mt-lg-auto,.my-lg-auto{margin-top:auto!important}.mr-lg-auto,.mx-lg-auto{margin-right:auto!important}.mb-lg-auto,.my-lg-auto{margin-bottom:auto!important}.ml-lg-auto,.mx-lg-auto{margin-left:auto!important}}@media (min-width:1200px){.m-xl-0{margin:0!important}.mt-xl-0,.my-xl-0{margin-top:0!important}.mr-xl-0,.mx-xl-0{margin-right:0!important}.mb-xl-0,.my-xl-0{margin-bottom:0!important}.ml-xl-0,.mx-xl-0{margin-left:0!important}.m-xl-1{margin:.25rem!important}.mt-xl-1,.my-xl-1{margin-top:.25rem!important}.mr-xl-1,.mx-xl-1{margin-right:.25rem!important}.mb-xl-1,.my-xl-1{margin-bottom:.25rem!important}.ml-xl-1,.mx-xl-1{margin-left:.25rem!important}.m-xl-2{margin:.5rem!important}.mt-xl-2,.my-xl-2{margin-top:.5rem!important}.mr-xl-2,.mx-xl-2{margin-right:.5rem!important}.mb-xl-2,.my-xl-2{margin-bottom:.5rem!important}.ml-xl-2,.mx-xl-2{margin-left:.5rem!important}.m-xl-3{margin:1rem!important}.mt-xl-3,.my-xl-3{margin-top:1rem!important}.mr-xl-3,.mx-xl-3{margin-right:1rem!important}.mb-xl-3,.my-xl-3{margin-bottom:1rem!important}.ml-xl-3,.mx-xl-3{margin-left:1rem!important}.m-xl-4{margin:1.5rem!important}.mt-xl-4,.my-xl-4{margin-top:1.5rem!important}.mr-xl-4,.mx-xl-4{margin-right:1.5rem!important}.mb-xl-4,.my-xl-4{margin-bottom:1.5rem!important}.ml-xl-4,.mx-xl-4{margin-left:1.5rem!important}.m-xl-5{margin:3rem!important}.mt-xl-5,.my-xl-5{margin-top:3rem!important}.mr-xl-5,.mx-xl-5{margin-right:3rem!important}.mb-xl-5,.my-xl-5{margin-bottom:3rem!important}.ml-xl-5,.mx-xl-5{margin-left:3rem!important}.p-xl-0{padding:0!important}.pt-xl-0,.py-xl-0{padding-top:0!important}.pr-xl-0,.px-xl-0{padding-right:0!important}.pb-xl-0,.py-xl-0{padding-bottom:0!important}.pl-xl-0,.px-xl-0{padding-left:0!important}.p-xl-1{padding:.25rem!important}.pt-xl-1,.py-xl-1{padding-top:.25rem!important}.pr-xl-1,.px-xl-1{padding-right:.25rem!important}.pb-xl-1,.py-xl-1{padding-bottom:.25rem!important}.pl-xl-1,.px-xl-1{padding-left:.25rem!important}.p-xl-2{padding:.5rem!important}.pt-xl-2,.py-xl-2{padding-top:.5rem!important}.pr-xl-2,.px-xl-2{padding-right:.5rem!important}.pb-xl-2,.py-xl-2{padding-bottom:.5rem!important}.pl-xl-2,.px-xl-2{padding-left:.5rem!important}.p-xl-3{padding:1rem!important}.pt-xl-3,.py-xl-3{padding-top:1rem!important}.pr-xl-3,.px-xl-3{padding-right:1rem!important}.pb-xl-3,.py-xl-3{padding-bottom:1rem!important}.pl-xl-3,.px-xl-3{padding-left:1rem!important}.p-xl-4{padding:1.5rem!important}.pt-xl-4,.py-xl-4{padding-top:1.5rem!important}.pr-xl-4,.px-xl-4{padding-right:1.5rem!important}.pb-xl-4,.py-xl-4{padding-bottom:1.5rem!important}.pl-xl-4,.px-xl-4{padding-left:1.5rem!important}.p-xl-5{padding:3rem!important}.pt-xl-5,.py-xl-5{padding-top:3rem!important}.pr-xl-5,.px-xl-5{padding-right:3rem!important}.pb-xl-5,.py-xl-5{padding-bottom:3rem!important}.pl-xl-5,.px-xl-5{padding-left:3rem!important}.m-xl-n1{margin:-.25rem!important}.mt-xl-n1,.my-xl-n1{margin-top:-.25rem!important}.mr-xl-n1,.mx-xl-n1{margin-right:-.25rem!important}.mb-xl-n1,.my-xl-n1{margin-bottom:-.25rem!important}.ml-xl-n1,.mx-xl-n1{margin-left:-.25rem!important}.m-xl-n2{margin:-.5rem!important}.mt-xl-n2,.my-xl-n2{margin-top:-.5rem!important}.mr-xl-n2,.mx-xl-n2{margin-right:-.5rem!important}.mb-xl-n2,.my-xl-n2{margin-bottom:-.5rem!important}.ml-xl-n2,.mx-xl-n2{margin-left:-.5rem!important}.m-xl-n3{margin:-1rem!important}.mt-xl-n3,.my-xl-n3{margin-top:-1rem!important}.mr-xl-n3,.mx-xl-n3{margin-right:-1rem!important}.mb-xl-n3,.my-xl-n3{margin-bottom:-1rem!important}.ml-xl-n3,.mx-xl-n3{margin-left:-1rem!important}.m-xl-n4{margin:-1.5rem!important}.mt-xl-n4,.my-xl-n4{margin-top:-1.5rem!important}.mr-xl-n4,.mx-xl-n4{margin-right:-1.5rem!important}.mb-xl-n4,.my-xl-n4{margin-bottom:-1.5rem!important}.ml-xl-n4,.mx-xl-n4{margin-left:-1.5rem!important}.m-xl-n5{margin:-3rem!important}.mt-xl-n5,.my-xl-n5{margin-top:-3rem!important}.mr-xl-n5,.mx-xl-n5{margin-right:-3rem!important}.mb-xl-n5,.my-xl-n5{margin-bottom:-3rem!important}.ml-xl-n5,.mx-xl-n5{margin-left:-3rem!important}.m-xl-auto{margin:auto!important}.mt-xl-auto,.my-xl-auto{margin-top:auto!important}.mr-xl-auto,.mx-xl-auto{margin-right:auto!important}.mb-xl-auto,.my-xl-auto{margin-bottom:auto!important}.ml-xl-auto,.mx-xl-auto{margin-left:auto!important}}.stretched-link:after{position:absolute;top:0;right:0;bottom:0;left:0;z-index:1;pointer-events:auto;content:"";background-color:transparent}.text-monospace{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace!important}.text-justify{text-align:justify!important}.text-wrap{white-space:normal!important}.text-nowrap{white-space:nowrap!important}.text-truncate{overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.text-left{text-align:left!important}.text-right{text-align:right!important}.text-center{text-align:center!important}@media (min-width:540px){.text-sm-left{text-align:left!important}.text-sm-right{text-align:right!important}.text-sm-center{text-align:center!important}}@media (min-width:720px){.text-md-left{text-align:left!important}.text-md-right{text-align:right!important}.text-md-center{text-align:center!important}}@media (min-width:960px){.text-lg-left{text-align:left!important}.text-lg-right{text-align:right!important}.text-lg-center{text-align:center!important}}@media (min-width:1200px){.text-xl-left{text-align:left!important}.text-xl-right{text-align:right!important}.text-xl-center{text-align:center!important}}.text-lowercase{text-transform:lowercase!important}.text-uppercase{text-transform:uppercase!important}.text-capitalize{text-transform:capitalize!important}.font-weight-light{font-weight:300!important}.font-weight-lighter{font-weight:lighter!important}.font-weight-normal{font-weight:400!important}.font-weight-bold{font-weight:700!important}.font-weight-bolder{font-weight:bolder!important}.font-italic{font-style:italic!important}.text-white{color:#fff!important}.text-primary{color:#007bff!important}a.text-primary:focus,a.text-primary:hover{color:#0056b3!important}.text-secondary{color:#6c757d!important}a.text-secondary:focus,a.text-secondary:hover{color:#494f54!important}.text-success{color:#28a745!important}a.text-success:focus,a.text-success:hover{color:#19692c!important}.text-info{color:#17a2b8!important}a.text-info:focus,a.text-info:hover{color:#0f6674!important}.text-warning{color:#ffc107!important}a.text-warning:focus,a.text-warning:hover{color:#ba8b00!important}.text-danger{color:#dc3545!important}a.text-danger:focus,a.text-danger:hover{color:#a71d2a!important}.text-light{color:#f8f9fa!important}a.text-light:focus,a.text-light:hover{color:#cbd3da!important}.text-dark{color:#343a40!important}a.text-dark:focus,a.text-dark:hover{color:#121416!important}.text-body{color:#212529!important}.text-muted{color:#6c757d!important}.text-black-50{color:rgba(0,0,0,.5)!important}.text-white-50{color:hsla(0,0%,100%,.5)!important}.text-hide{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0}.text-decoration-none{text-decoration:none!important}.text-break{word-break:break-word!important;word-wrap:break-word!important}.text-reset{color:inherit!important}.visible{visibility:visible!important}.invisible{visibility:hidden!important}@media print{*,:after,:before{text-shadow:none!important;box-shadow:none!important}a:not(.btn){text-decoration:underline}abbr[title]:after{content:" (" attr(title) ")"}pre{white-space:pre-wrap!important}blockquote,pre{border:1px solid #adb5bd}blockquote,img,pre,tr{page-break-inside:avoid}h2,h3,p{orphans:3;widows:3}h2,h3{page-break-after:avoid}@page{size:a3}.container,body{min-width:960px!important}.navbar{display:none}.badge{border:1px solid #000}.table{border-collapse:collapse!important}.table td,.table th{background-color:#fff!important}.table-bordered td,.table-bordered th{border:1px solid #dee2e6!important}.table-dark{color:inherit}.table-dark tbody+tbody,.table-dark td,.table-dark th,.table-dark thead th{border-color:#dee2e6}.table .thead-dark th{color:inherit;border-color:#dee2e6}}html{font-size:var(--pst-font-size-base);scroll-padding-top:calc(var(--pst-header-height) + 12px)}body{padding-top:calc(var(--pst-header-height) + 20px);background-color:#fff;font-family:var(--pst-font-family-base);font-weight:400;line-height:1.65;color:rgba(var(--pst-color-text-base),1)}p{margin-bottom:1.15rem;font-size:1em;color:rgba(var(--pst-color-paragraph),1)}p.rubric{border-bottom:1px solid #c9c9c9}a{color:rgba(var(--pst-color-link),1);text-decoration:none}a:hover{color:rgba(var(--pst-color-link-hover),1);text-decoration:underline}a.headerlink{color:rgba(var(--pst-color-headerlink),1);font-size:.8em;padding:0 4px;text-decoration:none}a.headerlink:hover{background-color:rgba(var(--pst-color-headerlink),1);color:rgba(var(--pst-color-headerlink-hover),1)}.heading-style,h1,h2,h3,h4,h5,h6{margin:2.75rem 0 1.05rem;font-family:var(--pst-font-family-heading);font-weight:400;line-height:1.15}h1{margin-top:0;font-size:var(--pst-font-size-h1);color:rgba(var(--pst-color-h1),1)}h2{font-size:var(--pst-font-size-h2);color:rgba(var(--pst-color-h2),1)}h3{font-size:var(--pst-font-size-h3);color:rgba(var(--pst-color-h3),1)}h4{font-size:var(--pst-font-size-h4);color:rgba(var(--pst-color-h4),1)}h5{font-size:var(--pst-font-size-h5);color:rgba(var(--pst-color-h5),1)}h6{font-size:var(--pst-font-size-h6);color:rgba(var(--pst-color-h6),1)}.text_small,small{font-size:var(--pst-font-size-milli)}hr{border:0;border-top:1px solid #e5e5e5}code,kbd,pre,samp{font-family:var(--pst-font-family-monospace)}code{color:rgba(var(--pst-color-inline-code),1)}pre{margin:1.5em 0;padding:10px;background-color:rgba(var(--pst-color-preformatted-background),1);color:rgba(var(--pst-color-preformatted-text),1);line-height:1.2em;border:1px solid #c9c9c9;border-radius:.2rem;box-shadow:1px 1px 1px #d8d8d8}dd{margin-top:3px;margin-bottom:10px;margin-left:30px}dl.field-list{display:grid;grid-template-columns:fit-content(30%) minmax(0,1fr)}.navbar{position:fixed;min-height:var(--pst-header-height);width:100%;padding:0}.navbar .container-xl{height:100%}@media (max-width:1199.98px){.navbar #navbar-start{margin-left:1em}.navbar button.navbar-toggler{margin-right:1em}}@media (min-width:960px){.navbar #navbar-end>.navbar-end-item{display:inline-block}}.navbar-brand{position:relative;height:var(--pst-header-height);width:auto;padding:.5rem 0;display:flex;align-items:center}.navbar-brand p{margin-bottom:0}.navbar-brand img{max-width:100%;height:100%;width:auto}.navbar-light{background:#fff!important;box-shadow:0 .125rem .25rem 0 rgba(0,0,0,.11)}.navbar-light .navbar-nav li a.nav-link{padding:0 .5rem;color:rgba(var(--pst-color-navbar-link),1)}.navbar-light .navbar-nav li a.nav-link:hover{color:rgba(var(--pst-color-navbar-link-hover),1)}.navbar-light .navbar-nav>.active>.nav-link{font-weight:600;color:rgba(var(--pst-color-navbar-link-active),1)}.navbar-header a{padding:0 15px}.admonition,div.admonition{margin:1.5625em auto;padding:0 .6rem .8rem;overflow:hidden;page-break-inside:avoid;border-left:.2rem solid;border-left-color:rgba(var(--pst-color-admonition-default),1);border-bottom-color:rgba(var(--pst-color-admonition-default),1);border-right-color:rgba(var(--pst-color-admonition-default),1);border-top-color:rgba(var(--pst-color-admonition-default),1);border-radius:.2rem;box-shadow:0 .2rem .5rem rgba(0,0,0,.05),0 0 .0625rem rgba(0,0,0,.1);transition:color .25s,background-color .25s,border-color .25s}.admonition :last-child,div.admonition :last-child{margin-bottom:0}.admonition p.admonition-title~*,div.admonition p.admonition-title~*{margin-left:1.4rem;margin-right:1.4rem}.admonition>ol,.admonition>ul,div.admonition>ol,div.admonition>ul{margin-left:1em}.admonition>.admonition-title,div.admonition>.admonition-title{position:relative;margin:0 -.6rem;padding:.4rem .6rem .4rem 2rem;font-weight:700;background-color:rgba(var(--pst-color-admonition-default),.1)}.admonition>.admonition-title:before,div.admonition>.admonition-title:before{position:absolute;left:.6rem;width:1rem;height:1rem;color:rgba(var(--pst-color-admonition-default),1);font-family:Font Awesome\ 5 Free;font-weight:900;content:var(--pst-icon-admonition-default)}.admonition>.admonition-title+*,div.admonition>.admonition-title+*{margin-top:.4em}.admonition.attention,div.admonition.attention{border-color:rgba(var(--pst-color-admonition-attention),1)}.admonition.attention>.admonition-title,div.admonition.attention>.admonition-title{background-color:rgba(var(--pst-color-admonition-attention),.1)}.admonition.attention>.admonition-title:before,div.admonition.attention>.admonition-title:before{color:rgba(var(--pst-color-admonition-attention),1);content:var(--pst-icon-admonition-attention)}.admonition.caution,div.admonition.caution{border-color:rgba(var(--pst-color-admonition-caution),1)}.admonition.caution>.admonition-title,div.admonition.caution>.admonition-title{background-color:rgba(var(--pst-color-admonition-caution),.1)}.admonition.caution>.admonition-title:before,div.admonition.caution>.admonition-title:before{color:rgba(var(--pst-color-admonition-caution),1);content:var(--pst-icon-admonition-caution)}.admonition.warning,div.admonition.warning{border-color:rgba(var(--pst-color-admonition-warning),1)}.admonition.warning>.admonition-title,div.admonition.warning>.admonition-title{background-color:rgba(var(--pst-color-admonition-warning),.1)}.admonition.warning>.admonition-title:before,div.admonition.warning>.admonition-title:before{color:rgba(var(--pst-color-admonition-warning),1);content:var(--pst-icon-admonition-warning)}.admonition.danger,div.admonition.danger{border-color:rgba(var(--pst-color-admonition-danger),1)}.admonition.danger>.admonition-title,div.admonition.danger>.admonition-title{background-color:rgba(var(--pst-color-admonition-danger),.1)}.admonition.danger>.admonition-title:before,div.admonition.danger>.admonition-title:before{color:rgba(var(--pst-color-admonition-danger),1);content:var(--pst-icon-admonition-danger)}.admonition.error,div.admonition.error{border-color:rgba(var(--pst-color-admonition-error),1)}.admonition.error>.admonition-title,div.admonition.error>.admonition-title{background-color:rgba(var(--pst-color-admonition-error),.1)}.admonition.error>.admonition-title:before,div.admonition.error>.admonition-title:before{color:rgba(var(--pst-color-admonition-error),1);content:var(--pst-icon-admonition-error)}.admonition.hint,div.admonition.hint{border-color:rgba(var(--pst-color-admonition-hint),1)}.admonition.hint>.admonition-title,div.admonition.hint>.admonition-title{background-color:rgba(var(--pst-color-admonition-hint),.1)}.admonition.hint>.admonition-title:before,div.admonition.hint>.admonition-title:before{color:rgba(var(--pst-color-admonition-hint),1);content:var(--pst-icon-admonition-hint)}.admonition.tip,div.admonition.tip{border-color:rgba(var(--pst-color-admonition-tip),1)}.admonition.tip>.admonition-title,div.admonition.tip>.admonition-title{background-color:rgba(var(--pst-color-admonition-tip),.1)}.admonition.tip>.admonition-title:before,div.admonition.tip>.admonition-title:before{color:rgba(var(--pst-color-admonition-tip),1);content:var(--pst-icon-admonition-tip)}.admonition.important,div.admonition.important{border-color:rgba(var(--pst-color-admonition-important),1)}.admonition.important>.admonition-title,div.admonition.important>.admonition-title{background-color:rgba(var(--pst-color-admonition-important),.1)}.admonition.important>.admonition-title:before,div.admonition.important>.admonition-title:before{color:rgba(var(--pst-color-admonition-important),1);content:var(--pst-icon-admonition-important)}.admonition.note,div.admonition.note{border-color:rgba(var(--pst-color-admonition-note),1)}.admonition.note>.admonition-title,div.admonition.note>.admonition-title{background-color:rgba(var(--pst-color-admonition-note),.1)}.admonition.note>.admonition-title:before,div.admonition.note>.admonition-title:before{color:rgba(var(--pst-color-admonition-note),1);content:var(--pst-icon-admonition-note)}table.field-list{border-collapse:separate;border-spacing:10px;margin-left:1px}table.field-list th.field-name{padding:1px 8px 1px 5px;white-space:nowrap;background-color:#eee}table.field-list td.field-body p{font-style:italic}table.field-list td.field-body p>strong{font-style:normal}table.field-list td.field-body blockquote{border-left:none;margin:0 0 .3em;padding-left:30px}.table.autosummary td:first-child{white-space:nowrap}.sig{font-family:var(--pst-font-family-monospace)}.sig-inline.c-texpr,.sig-inline.cpp-texpr{font-family:unset}.sig.c .k,.sig.c .kt,.sig.c .m,.sig.c .s,.sig.c .sc,.sig.cpp .k,.sig.cpp .kt,.sig.cpp .m,.sig.cpp .s,.sig.cpp .sc{color:rgba(var(--pst-color-text-base),1)}.sig-name{color:rgba(var(--pst-color-inline-code),1)}td.text-align\:left,th.text-align\:left{text-align:left}td.text-align\:right,th.text-align\:right{text-align:right}td.text-align\:center,th.text-align\:center{text-align:center}div.deprecated,div.versionadded,div.versionchanged{vertical-align:middle;margin:1.5625em auto;padding:0 .6rem;overflow:hidden;page-break-inside:avoid;border-left:.2rem solid;border-left-color:rgba(var(--pst-color-versionmodified-default),1);border-bottom-color:rgba(var(--pst-color-versionmodified-default),1);border-right-color:rgba(var(--pst-color-versionmodified-default),1);border-top-color:rgba(var(--pst-color-versionmodified-default),1);border-radius:.2rem;box-shadow:0 .2rem .5rem rgba(0,0,0,.05),0 0 .0625rem rgba(0,0,0,.1);transition:color .25s,background-color .25s,border-color .25s;background-color:rgba(var(--pst-color-admonition-default),.1)}div.deprecated>p,div.versionadded>p,div.versionchanged>p{margin-bottom:.6rem;margin-top:.6rem}div.versionadded{border-color:rgba(var(--pst-color-versionmodified-added),1);background-color:rgba(var(--pst-color-versionmodified-added),.1)}div.versionchanged{border-color:rgba(var(--pst-color-versionmodified-changed),1);background-color:rgba(var(--pst-color-versionmodified-changed),.1)}div.deprecated{border-color:rgba(var(--pst-color-versionmodified-deprecated),1);background-color:rgba(var(--pst-color-versionmodified-deprecated),.1)}span.versionmodified{font-weight:700}span.versionmodified:before{font-style:normal;margin-right:.6rem;color:rgba(var(--pst-color-versionmodified-default),1);font-family:Font Awesome\ 5 Free;font-weight:900;content:var(--pst-icon-versionmodified-default)}span.versionmodified.added:before{color:rgba(var(--pst-color-versionmodified-added),1);content:var(--pst-icon-versionmodified-added)}span.versionmodified.changed:before{color:rgba(var(--pst-color-versionmodified-changed),1);content:var(--pst-icon-versionmodified-changed)}span.versionmodified.deprecated:before{color:rgba(var(--pst-color-versionmodified-deprecated),1);content:var(--pst-icon-versionmodified-deprecated)}blockquote{padding:0 1em;color:#6a737d;border-left:.25em solid #dfe2e5}dt.label>span.brackets:not(:only-child):before{content:"["}dt.label>span.brackets:not(:only-child):after{content:"]"}a.footnote-reference{vertical-align:super;font-size:small}.topic{background-color:#eee}.seealso dd{margin-top:0;margin-bottom:0}.viewcode-back{font-family:var(--pst-font-family-base)}.viewcode-block:target{background-color:#f4debf;border-top:1px solid #ac9;border-bottom:1px solid #ac9}span.guilabel{border:1px solid #7fbbe3;background:#e7f2fa;font-size:80%;font-weight:700;border-radius:4px;padding:2.4px 6px;margin:auto 2px}footer{width:100%;border-top:1px solid #ccc;padding:10px}footer .footer-item p{margin-bottom:0}.bd-search{position:relative;padding:1rem 15px;margin-right:-15px;margin-left:-15px}.bd-search .icon{position:absolute;color:#a4a6a7;left:25px;top:25px}.bd-search input{border-radius:0;border:0;border-bottom:1px solid #e5e5e5;padding-left:35px}.bd-toc{-ms-flex-order:2;order:2;height:calc(100vh - 2rem);overflow-y:auto}@supports (position:-webkit-sticky) or (position:sticky){.bd-toc{position:-webkit-sticky;position:sticky;top:calc(var(--pst-header-height) + 20px);height:calc(100vh - 5rem);overflow-y:auto}}.bd-toc .onthispage{color:#a4a6a7}.section-nav{padding-left:0;border-left:1px solid #eee;border-bottom:none}.section-nav ul{padding-left:1rem}.toc-entry,.toc-entry a{display:block}.toc-entry a{padding:.125rem 1.5rem;color:rgba(var(--pst-color-toc-link),1)}@media (min-width:1200px){.toc-entry a{padding-right:0}}.toc-entry a:hover{color:rgba(var(--pst-color-toc-link-hover),1);text-decoration:none}.bd-sidebar{padding-top:1em;overflow-y:auto;display:flex;flex-direction:column}@media (min-width:720px){.bd-sidebar{border-right:1px solid rgba(0,0,0,.1)}@supports (position:-webkit-sticky) or (position:sticky){.bd-sidebar{position:-webkit-sticky;position:sticky;top:calc(var(--pst-header-height) + 20px);z-index:1000;height:calc(100vh - var(--pst-header-height) - 20px)}}}.bd-sidebar.no-sidebar{border-right:0}.bd-sidebar .sidebar-end-items{margin-top:auto;margin-bottom:1em}.bd-links{padding-top:1rem;padding-bottom:1rem;margin-right:-15px;margin-left:-15px}@media (min-width:720px){.bd-links{display:block}}.bd-sidenav{display:none}.bd-content{padding-top:20px}.bd-content .section{max-width:100%}.bd-content .section table{display:block;overflow:auto}.bd-toc-link{display:block;padding:.25rem 1.5rem;font-weight:600;color:rgba(0,0,0,.65)}.bd-toc-link:hover{color:rgba(0,0,0,.85);text-decoration:none}.bd-toc-item.active{margin-bottom:1rem}.bd-toc-item.active:not(:first-child){margin-top:1rem}.bd-toc-item.active>.bd-toc-link{color:rgba(0,0,0,.85)}.bd-toc-item.active>.bd-toc-link:hover{background-color:transparent}.bd-toc-item.active>.bd-sidenav{display:block}nav.bd-links p.caption{font-size:var(--pst-sidebar-caption-font-size);text-transform:uppercase;font-weight:700;position:relative;margin-top:1.25em;margin-bottom:.5em;padding:0 1.5rem;color:rgba(var(--pst-color-sidebar-caption),1)}nav.bd-links p.caption:first-child{margin-top:0}.bd-sidebar .nav{font-size:var(--pst-sidebar-font-size)}.bd-sidebar .nav ul{list-style:none;padding:0 0 0 1.5rem}.bd-sidebar .nav li>a{display:block;padding:.25rem 1.5rem;color:rgba(var(--pst-color-sidebar-link),1)}.bd-sidebar .nav li>a:hover{color:rgba(var(--pst-color-sidebar-link-hover),1);text-decoration:none;background-color:transparent}.bd-sidebar .nav li>a.reference.external:after{font-family:Font Awesome\ 5 Free;font-weight:900;content:"\f35d";font-size:.75em;margin-left:.3em}.bd-sidebar .nav .active:hover>a,.bd-sidebar .nav .active>a{font-weight:600;color:rgba(var(--pst-color-sidebar-link-active),1)}.toc-h2{font-size:.85rem}.toc-h3{font-size:.75rem}.toc-h4{font-size:.65rem}.toc-entry>.nav-link.active{font-weight:600;color:#130654;color:rgba(var(--pst-color-toc-link-active),1);background-color:transparent;border-left:2px solid rgba(var(--pst-color-toc-link-active),1)}.nav-link:hover{border-style:none}#navbar-main-elements li.nav-item i{font-size:.7rem;padding-left:2px;vertical-align:middle}.bd-toc .nav .nav{display:none}.bd-toc .nav .nav.visible,.bd-toc .nav>.active>ul{display:block}.prev-next-area{margin:20px 0}.prev-next-area p{margin:0 .3em;line-height:1.3em}.prev-next-area i{font-size:1.2em}.prev-next-area a{display:flex;align-items:center;border:none;padding:10px;max-width:45%;overflow-x:hidden;color:rgba(0,0,0,.65);text-decoration:none}.prev-next-area a p.prev-next-title{color:rgba(var(--pst-color-link),1);font-weight:600;font-size:1.1em}.prev-next-area a:hover p.prev-next-title{text-decoration:underline}.prev-next-area a .prev-next-info{flex-direction:column;margin:0 .5em}.prev-next-area a .prev-next-info .prev-next-subtitle{text-transform:capitalize}.prev-next-area a.left-prev{float:left}.prev-next-area a.right-next{float:right}.prev-next-area a.right-next div.prev-next-info{text-align:right}.alert{padding-bottom:0}.alert-info a{color:#e83e8c}#navbar-icon-links i.fa,#navbar-icon-links i.fab,#navbar-icon-links i.far,#navbar-icon-links i.fas{vertical-align:middle;font-style:normal;font-size:1.5rem;line-height:1.25}#navbar-icon-links i.fa-github-square:before{color:#333}#navbar-icon-links i.fa-twitter-square:before{color:#55acee}#navbar-icon-links i.fa-gitlab:before{color:#548}#navbar-icon-links i.fa-bitbucket:before{color:#0052cc}#navbar-icon-links img.icon-link-image{height:1.5em}.tocsection{border-left:1px solid #eee;padding:.3rem 1.5rem}.tocsection i{padding-right:.5rem}.editthispage{padding-top:2rem}.editthispage a{color:var(--pst-color-sidebar-link-active)}.xr-wrap[hidden]{display:block!important}.toctree-checkbox{position:absolute;display:none}.toctree-checkbox~ul{display:none}.toctree-checkbox~label i{transform:rotate(0deg)}.toctree-checkbox:checked~ul{display:block}.toctree-checkbox:checked~label i{transform:rotate(180deg)}.bd-sidebar li{position:relative}.bd-sidebar label{position:absolute;top:0;right:0;height:30px;width:30px;cursor:pointer;display:flex;justify-content:center;align-items:center}.bd-sidebar label:hover{background:rgba(var(--pst-color-sidebar-expander-background-hover),1)}.bd-sidebar label i{display:inline-block;font-size:.75rem;text-align:center}.bd-sidebar label i:hover{color:rgba(var(--pst-color-sidebar-link-hover),1)}.bd-sidebar li.has-children>.reference{padding-right:30px}div.doctest>div.highlight span.gp,span.linenos,table.highlighttable td.linenos{user-select:none;-webkit-user-select:text;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none}.docutils.container{padding-left:unset;padding-right:unset} \ No newline at end of file diff --git a/_static/styles/theme.css b/_static/styles/theme.css new file mode 100644 index 000000000..22779d965 --- /dev/null +++ b/_static/styles/theme.css @@ -0,0 +1,134 @@ +/* Provided by Sphinx's 'basic' theme, and included in the final set of assets */ +@import "../basic.css"; + +:root { + /***************************************************************************** + * Theme config + **/ + --pst-header-height: 60px; + + /***************************************************************************** + * Font size + **/ + --pst-font-size-base: 15px; /* base font size - applied at body / html level */ + + /* heading font sizes */ + --pst-font-size-h1: 36px; + --pst-font-size-h2: 32px; + --pst-font-size-h3: 26px; + --pst-font-size-h4: 21px; + --pst-font-size-h5: 18px; + --pst-font-size-h6: 16px; + + /* smaller then heading font sizes*/ + --pst-font-size-milli: 12px; + + --pst-sidebar-font-size: 0.9em; + --pst-sidebar-caption-font-size: 0.9em; + + /***************************************************************************** + * Font family + **/ + /* These are adapted from https://systemfontstack.com/ */ + --pst-font-family-base-system: -apple-system, BlinkMacSystemFont, Segoe UI, + "Helvetica Neue", Arial, sans-serif, Apple Color Emoji, Segoe UI Emoji, + Segoe UI Symbol; + --pst-font-family-monospace-system: "SFMono-Regular", Menlo, Consolas, Monaco, + Liberation Mono, Lucida Console, monospace; + + --pst-font-family-base: var(--pst-font-family-base-system); + --pst-font-family-heading: var(--pst-font-family-base); + --pst-font-family-monospace: var(--pst-font-family-monospace-system); + + /***************************************************************************** + * Color + * + * Colors are defined in rgb string way, "red, green, blue" + **/ + --pst-color-primary: 19, 6, 84; + --pst-color-success: 40, 167, 69; + --pst-color-info: 0, 123, 255; /*23, 162, 184;*/ + --pst-color-warning: 255, 193, 7; + --pst-color-danger: 220, 53, 69; + --pst-color-text-base: 51, 51, 51; + + --pst-color-h1: var(--pst-color-primary); + --pst-color-h2: var(--pst-color-primary); + --pst-color-h3: var(--pst-color-text-base); + --pst-color-h4: var(--pst-color-text-base); + --pst-color-h5: var(--pst-color-text-base); + --pst-color-h6: var(--pst-color-text-base); + --pst-color-paragraph: var(--pst-color-text-base); + --pst-color-link: 0, 91, 129; + --pst-color-link-hover: 227, 46, 0; + --pst-color-headerlink: 198, 15, 15; + --pst-color-headerlink-hover: 255, 255, 255; + --pst-color-preformatted-text: 34, 34, 34; + --pst-color-preformatted-background: 250, 250, 250; + --pst-color-inline-code: 232, 62, 140; + + --pst-color-active-navigation: 19, 6, 84; + --pst-color-navbar-link: 77, 77, 77; + --pst-color-navbar-link-hover: var(--pst-color-active-navigation); + --pst-color-navbar-link-active: var(--pst-color-active-navigation); + --pst-color-sidebar-link: 77, 77, 77; + --pst-color-sidebar-link-hover: var(--pst-color-active-navigation); + --pst-color-sidebar-link-active: var(--pst-color-active-navigation); + --pst-color-sidebar-expander-background-hover: 244, 244, 244; + --pst-color-sidebar-caption: 77, 77, 77; + --pst-color-toc-link: 119, 117, 122; + --pst-color-toc-link-hover: var(--pst-color-active-navigation); + --pst-color-toc-link-active: var(--pst-color-active-navigation); + + /***************************************************************************** + * Icon + **/ + + /* font awesome icons*/ + --pst-icon-check-circle: "\f058"; + --pst-icon-info-circle: "\f05a"; + --pst-icon-exclamation-triangle: "\f071"; + --pst-icon-exclamation-circle: "\f06a"; + --pst-icon-times-circle: "\f057"; + --pst-icon-lightbulb: "\f0eb"; + + /***************************************************************************** + * Admonitions + **/ + + --pst-color-admonition-default: var(--pst-color-info); + --pst-color-admonition-note: var(--pst-color-info); + --pst-color-admonition-attention: var(--pst-color-warning); + --pst-color-admonition-caution: var(--pst-color-warning); + --pst-color-admonition-warning: var(--pst-color-warning); + --pst-color-admonition-danger: var(--pst-color-danger); + --pst-color-admonition-error: var(--pst-color-danger); + --pst-color-admonition-hint: var(--pst-color-success); + --pst-color-admonition-tip: var(--pst-color-success); + --pst-color-admonition-important: var(--pst-color-success); + + --pst-icon-admonition-default: var(--pst-icon-info-circle); + --pst-icon-admonition-note: var(--pst-icon-info-circle); + --pst-icon-admonition-attention: var(--pst-icon-exclamation-circle); + --pst-icon-admonition-caution: var(--pst-icon-exclamation-triangle); + --pst-icon-admonition-warning: var(--pst-icon-exclamation-triangle); + --pst-icon-admonition-danger: var(--pst-icon-exclamation-triangle); + --pst-icon-admonition-error: var(--pst-icon-times-circle); + --pst-icon-admonition-hint: var(--pst-icon-lightbulb); + --pst-icon-admonition-tip: var(--pst-icon-lightbulb); + --pst-icon-admonition-important: var(--pst-icon-exclamation-circle); + + /***************************************************************************** + * versionmodified + **/ + + --pst-color-versionmodified-default: var(--pst-color-info); + --pst-color-versionmodified-added: var(--pst-color-success); + --pst-color-versionmodified-changed: var(--pst-color-warning); + --pst-color-versionmodified-deprecated: var(--pst-color-danger); + + --pst-icon-versionmodified-default: var(--pst-icon-exclamation-circle); + --pst-icon-versionmodified-added: var(--pst-icon-exclamation-circle); + --pst-icon-versionmodified-changed: var(--pst-icon-exclamation-circle); + --pst-icon-versionmodified-deprecated: var(--pst-icon-exclamation-circle); +} diff --git a/docs/source/_static/theme_overrides.css b/_static/theme_overrides.css similarity index 100% rename from docs/source/_static/theme_overrides.css rename to _static/theme_overrides.css diff --git a/_static/vendor/fontawesome/5.13.0/LICENSE.txt b/_static/vendor/fontawesome/5.13.0/LICENSE.txt new file mode 100644 index 000000000..f31bef92b --- /dev/null +++ b/_static/vendor/fontawesome/5.13.0/LICENSE.txt @@ -0,0 +1,34 @@ +Font Awesome Free License +------------------------- + +Font Awesome Free is free, open source, and GPL friendly. You can use it for +commercial projects, open source projects, or really almost whatever you want. +Full Font Awesome Free license: https://fontawesome.com/license/free. + +# Icons: CC BY 4.0 License (https://creativecommons.org/licenses/by/4.0/) +In the Font Awesome Free download, the CC BY 4.0 license applies to all icons +packaged as SVG and JS file types. + +# Fonts: SIL OFL 1.1 License (https://scripts.sil.org/OFL) +In the Font Awesome Free download, the SIL OFL license applies to all icons +packaged as web and desktop font files. + +# Code: MIT License (https://opensource.org/licenses/MIT) +In the Font Awesome Free download, the MIT license applies to all non-font and +non-icon files. + +# Attribution +Attribution is required by MIT, SIL OFL, and CC BY licenses. Downloaded Font +Awesome Free files already contain embedded comments with sufficient +attribution, so you shouldn't need to do anything additional when using these +files normally. + +We've kept attribution comments terse, so we ask that you do not actively work +to remove them from files, especially code. They're a great way for folks to +learn about Font Awesome. + +# Brand Icons +All brand icons are trademarks of their respective owners. The use of these +trademarks does not indicate endorsement of the trademark holder by Font +Awesome, nor vice versa. **Please do not use brand logos for any purpose except +to represent the company, product, or service to which they refer.** diff --git a/_static/vendor/fontawesome/5.13.0/css/all.min.css b/_static/vendor/fontawesome/5.13.0/css/all.min.css new file mode 100644 index 000000000..3d28ab203 --- /dev/null +++ b/_static/vendor/fontawesome/5.13.0/css/all.min.css @@ -0,0 +1,5 @@ +/*! + * Font Awesome Free 5.13.0 by @fontawesome - https://fontawesome.com + * License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) + */ +.fa,.fab,.fad,.fal,.far,.fas{-moz-osx-font-smoothing:grayscale;-webkit-font-smoothing:antialiased;display:inline-block;font-style:normal;font-variant:normal;text-rendering:auto;line-height:1}.fa-lg{font-size:1.33333em;line-height:.75em;vertical-align:-.0667em}.fa-xs{font-size:.75em}.fa-sm{font-size:.875em}.fa-1x{font-size:1em}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-6x{font-size:6em}.fa-7x{font-size:7em}.fa-8x{font-size:8em}.fa-9x{font-size:9em}.fa-10x{font-size:10em}.fa-fw{text-align:center;width:1.25em}.fa-ul{list-style-type:none;margin-left:2.5em;padding-left:0}.fa-ul>li{position:relative}.fa-li{left:-2em;position:absolute;text-align:center;width:2em;line-height:inherit}.fa-border{border:.08em solid #eee;border-radius:.1em;padding:.2em .25em .15em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa.fa-pull-left,.fab.fa-pull-left,.fal.fa-pull-left,.far.fa-pull-left,.fas.fa-pull-left{margin-right:.3em}.fa.fa-pull-right,.fab.fa-pull-right,.fal.fa-pull-right,.far.fa-pull-right,.fas.fa-pull-right{margin-left:.3em}.fa-spin{-webkit-animation:fa-spin 2s linear infinite;animation:fa-spin 2s linear infinite}.fa-pulse{-webkit-animation:fa-spin 1s steps(8) infinite;animation:fa-spin 1s steps(8) infinite}@-webkit-keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}to{-webkit-transform:rotate(1turn);transform:rotate(1turn)}}@keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}to{-webkit-transform:rotate(1turn);transform:rotate(1turn)}}.fa-rotate-90{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";-webkit-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";-webkit-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";-webkit-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";-webkit-transform:scaleX(-1);transform:scaleX(-1)}.fa-flip-vertical{-webkit-transform:scaleY(-1);transform:scaleY(-1)}.fa-flip-both,.fa-flip-horizontal.fa-flip-vertical,.fa-flip-vertical{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)"}.fa-flip-both,.fa-flip-horizontal.fa-flip-vertical{-webkit-transform:scale(-1);transform:scale(-1)}:root .fa-flip-both,:root .fa-flip-horizontal,:root .fa-flip-vertical,:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270{-webkit-filter:none;filter:none}.fa-stack{display:inline-block;height:2em;line-height:2em;position:relative;vertical-align:middle;width:2.5em}.fa-stack-1x,.fa-stack-2x{left:0;position:absolute;text-align:center;width:100%}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-500px:before{content:"\f26e"}.fa-accessible-icon:before{content:"\f368"}.fa-accusoft:before{content:"\f369"}.fa-acquisitions-incorporated:before{content:"\f6af"}.fa-ad:before{content:"\f641"}.fa-address-book:before{content:"\f2b9"}.fa-address-card:before{content:"\f2bb"}.fa-adjust:before{content:"\f042"}.fa-adn:before{content:"\f170"}.fa-adobe:before{content:"\f778"}.fa-adversal:before{content:"\f36a"}.fa-affiliatetheme:before{content:"\f36b"}.fa-air-freshener:before{content:"\f5d0"}.fa-airbnb:before{content:"\f834"}.fa-algolia:before{content:"\f36c"}.fa-align-center:before{content:"\f037"}.fa-align-justify:before{content:"\f039"}.fa-align-left:before{content:"\f036"}.fa-align-right:before{content:"\f038"}.fa-alipay:before{content:"\f642"}.fa-allergies:before{content:"\f461"}.fa-amazon:before{content:"\f270"}.fa-amazon-pay:before{content:"\f42c"}.fa-ambulance:before{content:"\f0f9"}.fa-american-sign-language-interpreting:before{content:"\f2a3"}.fa-amilia:before{content:"\f36d"}.fa-anchor:before{content:"\f13d"}.fa-android:before{content:"\f17b"}.fa-angellist:before{content:"\f209"}.fa-angle-double-down:before{content:"\f103"}.fa-angle-double-left:before{content:"\f100"}.fa-angle-double-right:before{content:"\f101"}.fa-angle-double-up:before{content:"\f102"}.fa-angle-down:before{content:"\f107"}.fa-angle-left:before{content:"\f104"}.fa-angle-right:before{content:"\f105"}.fa-angle-up:before{content:"\f106"}.fa-angry:before{content:"\f556"}.fa-angrycreative:before{content:"\f36e"}.fa-angular:before{content:"\f420"}.fa-ankh:before{content:"\f644"}.fa-app-store:before{content:"\f36f"}.fa-app-store-ios:before{content:"\f370"}.fa-apper:before{content:"\f371"}.fa-apple:before{content:"\f179"}.fa-apple-alt:before{content:"\f5d1"}.fa-apple-pay:before{content:"\f415"}.fa-archive:before{content:"\f187"}.fa-archway:before{content:"\f557"}.fa-arrow-alt-circle-down:before{content:"\f358"}.fa-arrow-alt-circle-left:before{content:"\f359"}.fa-arrow-alt-circle-right:before{content:"\f35a"}.fa-arrow-alt-circle-up:before{content:"\f35b"}.fa-arrow-circle-down:before{content:"\f0ab"}.fa-arrow-circle-left:before{content:"\f0a8"}.fa-arrow-circle-right:before{content:"\f0a9"}.fa-arrow-circle-up:before{content:"\f0aa"}.fa-arrow-down:before{content:"\f063"}.fa-arrow-left:before{content:"\f060"}.fa-arrow-right:before{content:"\f061"}.fa-arrow-up:before{content:"\f062"}.fa-arrows-alt:before{content:"\f0b2"}.fa-arrows-alt-h:before{content:"\f337"}.fa-arrows-alt-v:before{content:"\f338"}.fa-artstation:before{content:"\f77a"}.fa-assistive-listening-systems:before{content:"\f2a2"}.fa-asterisk:before{content:"\f069"}.fa-asymmetrik:before{content:"\f372"}.fa-at:before{content:"\f1fa"}.fa-atlas:before{content:"\f558"}.fa-atlassian:before{content:"\f77b"}.fa-atom:before{content:"\f5d2"}.fa-audible:before{content:"\f373"}.fa-audio-description:before{content:"\f29e"}.fa-autoprefixer:before{content:"\f41c"}.fa-avianex:before{content:"\f374"}.fa-aviato:before{content:"\f421"}.fa-award:before{content:"\f559"}.fa-aws:before{content:"\f375"}.fa-baby:before{content:"\f77c"}.fa-baby-carriage:before{content:"\f77d"}.fa-backspace:before{content:"\f55a"}.fa-backward:before{content:"\f04a"}.fa-bacon:before{content:"\f7e5"}.fa-bahai:before{content:"\f666"}.fa-balance-scale:before{content:"\f24e"}.fa-balance-scale-left:before{content:"\f515"}.fa-balance-scale-right:before{content:"\f516"}.fa-ban:before{content:"\f05e"}.fa-band-aid:before{content:"\f462"}.fa-bandcamp:before{content:"\f2d5"}.fa-barcode:before{content:"\f02a"}.fa-bars:before{content:"\f0c9"}.fa-baseball-ball:before{content:"\f433"}.fa-basketball-ball:before{content:"\f434"}.fa-bath:before{content:"\f2cd"}.fa-battery-empty:before{content:"\f244"}.fa-battery-full:before{content:"\f240"}.fa-battery-half:before{content:"\f242"}.fa-battery-quarter:before{content:"\f243"}.fa-battery-three-quarters:before{content:"\f241"}.fa-battle-net:before{content:"\f835"}.fa-bed:before{content:"\f236"}.fa-beer:before{content:"\f0fc"}.fa-behance:before{content:"\f1b4"}.fa-behance-square:before{content:"\f1b5"}.fa-bell:before{content:"\f0f3"}.fa-bell-slash:before{content:"\f1f6"}.fa-bezier-curve:before{content:"\f55b"}.fa-bible:before{content:"\f647"}.fa-bicycle:before{content:"\f206"}.fa-biking:before{content:"\f84a"}.fa-bimobject:before{content:"\f378"}.fa-binoculars:before{content:"\f1e5"}.fa-biohazard:before{content:"\f780"}.fa-birthday-cake:before{content:"\f1fd"}.fa-bitbucket:before{content:"\f171"}.fa-bitcoin:before{content:"\f379"}.fa-bity:before{content:"\f37a"}.fa-black-tie:before{content:"\f27e"}.fa-blackberry:before{content:"\f37b"}.fa-blender:before{content:"\f517"}.fa-blender-phone:before{content:"\f6b6"}.fa-blind:before{content:"\f29d"}.fa-blog:before{content:"\f781"}.fa-blogger:before{content:"\f37c"}.fa-blogger-b:before{content:"\f37d"}.fa-bluetooth:before{content:"\f293"}.fa-bluetooth-b:before{content:"\f294"}.fa-bold:before{content:"\f032"}.fa-bolt:before{content:"\f0e7"}.fa-bomb:before{content:"\f1e2"}.fa-bone:before{content:"\f5d7"}.fa-bong:before{content:"\f55c"}.fa-book:before{content:"\f02d"}.fa-book-dead:before{content:"\f6b7"}.fa-book-medical:before{content:"\f7e6"}.fa-book-open:before{content:"\f518"}.fa-book-reader:before{content:"\f5da"}.fa-bookmark:before{content:"\f02e"}.fa-bootstrap:before{content:"\f836"}.fa-border-all:before{content:"\f84c"}.fa-border-none:before{content:"\f850"}.fa-border-style:before{content:"\f853"}.fa-bowling-ball:before{content:"\f436"}.fa-box:before{content:"\f466"}.fa-box-open:before{content:"\f49e"}.fa-box-tissue:before{content:"\f95b"}.fa-boxes:before{content:"\f468"}.fa-braille:before{content:"\f2a1"}.fa-brain:before{content:"\f5dc"}.fa-bread-slice:before{content:"\f7ec"}.fa-briefcase:before{content:"\f0b1"}.fa-briefcase-medical:before{content:"\f469"}.fa-broadcast-tower:before{content:"\f519"}.fa-broom:before{content:"\f51a"}.fa-brush:before{content:"\f55d"}.fa-btc:before{content:"\f15a"}.fa-buffer:before{content:"\f837"}.fa-bug:before{content:"\f188"}.fa-building:before{content:"\f1ad"}.fa-bullhorn:before{content:"\f0a1"}.fa-bullseye:before{content:"\f140"}.fa-burn:before{content:"\f46a"}.fa-buromobelexperte:before{content:"\f37f"}.fa-bus:before{content:"\f207"}.fa-bus-alt:before{content:"\f55e"}.fa-business-time:before{content:"\f64a"}.fa-buy-n-large:before{content:"\f8a6"}.fa-buysellads:before{content:"\f20d"}.fa-calculator:before{content:"\f1ec"}.fa-calendar:before{content:"\f133"}.fa-calendar-alt:before{content:"\f073"}.fa-calendar-check:before{content:"\f274"}.fa-calendar-day:before{content:"\f783"}.fa-calendar-minus:before{content:"\f272"}.fa-calendar-plus:before{content:"\f271"}.fa-calendar-times:before{content:"\f273"}.fa-calendar-week:before{content:"\f784"}.fa-camera:before{content:"\f030"}.fa-camera-retro:before{content:"\f083"}.fa-campground:before{content:"\f6bb"}.fa-canadian-maple-leaf:before{content:"\f785"}.fa-candy-cane:before{content:"\f786"}.fa-cannabis:before{content:"\f55f"}.fa-capsules:before{content:"\f46b"}.fa-car:before{content:"\f1b9"}.fa-car-alt:before{content:"\f5de"}.fa-car-battery:before{content:"\f5df"}.fa-car-crash:before{content:"\f5e1"}.fa-car-side:before{content:"\f5e4"}.fa-caravan:before{content:"\f8ff"}.fa-caret-down:before{content:"\f0d7"}.fa-caret-left:before{content:"\f0d9"}.fa-caret-right:before{content:"\f0da"}.fa-caret-square-down:before{content:"\f150"}.fa-caret-square-left:before{content:"\f191"}.fa-caret-square-right:before{content:"\f152"}.fa-caret-square-up:before{content:"\f151"}.fa-caret-up:before{content:"\f0d8"}.fa-carrot:before{content:"\f787"}.fa-cart-arrow-down:before{content:"\f218"}.fa-cart-plus:before{content:"\f217"}.fa-cash-register:before{content:"\f788"}.fa-cat:before{content:"\f6be"}.fa-cc-amazon-pay:before{content:"\f42d"}.fa-cc-amex:before{content:"\f1f3"}.fa-cc-apple-pay:before{content:"\f416"}.fa-cc-diners-club:before{content:"\f24c"}.fa-cc-discover:before{content:"\f1f2"}.fa-cc-jcb:before{content:"\f24b"}.fa-cc-mastercard:before{content:"\f1f1"}.fa-cc-paypal:before{content:"\f1f4"}.fa-cc-stripe:before{content:"\f1f5"}.fa-cc-visa:before{content:"\f1f0"}.fa-centercode:before{content:"\f380"}.fa-centos:before{content:"\f789"}.fa-certificate:before{content:"\f0a3"}.fa-chair:before{content:"\f6c0"}.fa-chalkboard:before{content:"\f51b"}.fa-chalkboard-teacher:before{content:"\f51c"}.fa-charging-station:before{content:"\f5e7"}.fa-chart-area:before{content:"\f1fe"}.fa-chart-bar:before{content:"\f080"}.fa-chart-line:before{content:"\f201"}.fa-chart-pie:before{content:"\f200"}.fa-check:before{content:"\f00c"}.fa-check-circle:before{content:"\f058"}.fa-check-double:before{content:"\f560"}.fa-check-square:before{content:"\f14a"}.fa-cheese:before{content:"\f7ef"}.fa-chess:before{content:"\f439"}.fa-chess-bishop:before{content:"\f43a"}.fa-chess-board:before{content:"\f43c"}.fa-chess-king:before{content:"\f43f"}.fa-chess-knight:before{content:"\f441"}.fa-chess-pawn:before{content:"\f443"}.fa-chess-queen:before{content:"\f445"}.fa-chess-rook:before{content:"\f447"}.fa-chevron-circle-down:before{content:"\f13a"}.fa-chevron-circle-left:before{content:"\f137"}.fa-chevron-circle-right:before{content:"\f138"}.fa-chevron-circle-up:before{content:"\f139"}.fa-chevron-down:before{content:"\f078"}.fa-chevron-left:before{content:"\f053"}.fa-chevron-right:before{content:"\f054"}.fa-chevron-up:before{content:"\f077"}.fa-child:before{content:"\f1ae"}.fa-chrome:before{content:"\f268"}.fa-chromecast:before{content:"\f838"}.fa-church:before{content:"\f51d"}.fa-circle:before{content:"\f111"}.fa-circle-notch:before{content:"\f1ce"}.fa-city:before{content:"\f64f"}.fa-clinic-medical:before{content:"\f7f2"}.fa-clipboard:before{content:"\f328"}.fa-clipboard-check:before{content:"\f46c"}.fa-clipboard-list:before{content:"\f46d"}.fa-clock:before{content:"\f017"}.fa-clone:before{content:"\f24d"}.fa-closed-captioning:before{content:"\f20a"}.fa-cloud:before{content:"\f0c2"}.fa-cloud-download-alt:before{content:"\f381"}.fa-cloud-meatball:before{content:"\f73b"}.fa-cloud-moon:before{content:"\f6c3"}.fa-cloud-moon-rain:before{content:"\f73c"}.fa-cloud-rain:before{content:"\f73d"}.fa-cloud-showers-heavy:before{content:"\f740"}.fa-cloud-sun:before{content:"\f6c4"}.fa-cloud-sun-rain:before{content:"\f743"}.fa-cloud-upload-alt:before{content:"\f382"}.fa-cloudscale:before{content:"\f383"}.fa-cloudsmith:before{content:"\f384"}.fa-cloudversify:before{content:"\f385"}.fa-cocktail:before{content:"\f561"}.fa-code:before{content:"\f121"}.fa-code-branch:before{content:"\f126"}.fa-codepen:before{content:"\f1cb"}.fa-codiepie:before{content:"\f284"}.fa-coffee:before{content:"\f0f4"}.fa-cog:before{content:"\f013"}.fa-cogs:before{content:"\f085"}.fa-coins:before{content:"\f51e"}.fa-columns:before{content:"\f0db"}.fa-comment:before{content:"\f075"}.fa-comment-alt:before{content:"\f27a"}.fa-comment-dollar:before{content:"\f651"}.fa-comment-dots:before{content:"\f4ad"}.fa-comment-medical:before{content:"\f7f5"}.fa-comment-slash:before{content:"\f4b3"}.fa-comments:before{content:"\f086"}.fa-comments-dollar:before{content:"\f653"}.fa-compact-disc:before{content:"\f51f"}.fa-compass:before{content:"\f14e"}.fa-compress:before{content:"\f066"}.fa-compress-alt:before{content:"\f422"}.fa-compress-arrows-alt:before{content:"\f78c"}.fa-concierge-bell:before{content:"\f562"}.fa-confluence:before{content:"\f78d"}.fa-connectdevelop:before{content:"\f20e"}.fa-contao:before{content:"\f26d"}.fa-cookie:before{content:"\f563"}.fa-cookie-bite:before{content:"\f564"}.fa-copy:before{content:"\f0c5"}.fa-copyright:before{content:"\f1f9"}.fa-cotton-bureau:before{content:"\f89e"}.fa-couch:before{content:"\f4b8"}.fa-cpanel:before{content:"\f388"}.fa-creative-commons:before{content:"\f25e"}.fa-creative-commons-by:before{content:"\f4e7"}.fa-creative-commons-nc:before{content:"\f4e8"}.fa-creative-commons-nc-eu:before{content:"\f4e9"}.fa-creative-commons-nc-jp:before{content:"\f4ea"}.fa-creative-commons-nd:before{content:"\f4eb"}.fa-creative-commons-pd:before{content:"\f4ec"}.fa-creative-commons-pd-alt:before{content:"\f4ed"}.fa-creative-commons-remix:before{content:"\f4ee"}.fa-creative-commons-sa:before{content:"\f4ef"}.fa-creative-commons-sampling:before{content:"\f4f0"}.fa-creative-commons-sampling-plus:before{content:"\f4f1"}.fa-creative-commons-share:before{content:"\f4f2"}.fa-creative-commons-zero:before{content:"\f4f3"}.fa-credit-card:before{content:"\f09d"}.fa-critical-role:before{content:"\f6c9"}.fa-crop:before{content:"\f125"}.fa-crop-alt:before{content:"\f565"}.fa-cross:before{content:"\f654"}.fa-crosshairs:before{content:"\f05b"}.fa-crow:before{content:"\f520"}.fa-crown:before{content:"\f521"}.fa-crutch:before{content:"\f7f7"}.fa-css3:before{content:"\f13c"}.fa-css3-alt:before{content:"\f38b"}.fa-cube:before{content:"\f1b2"}.fa-cubes:before{content:"\f1b3"}.fa-cut:before{content:"\f0c4"}.fa-cuttlefish:before{content:"\f38c"}.fa-d-and-d:before{content:"\f38d"}.fa-d-and-d-beyond:before{content:"\f6ca"}.fa-dailymotion:before{content:"\f952"}.fa-dashcube:before{content:"\f210"}.fa-database:before{content:"\f1c0"}.fa-deaf:before{content:"\f2a4"}.fa-delicious:before{content:"\f1a5"}.fa-democrat:before{content:"\f747"}.fa-deploydog:before{content:"\f38e"}.fa-deskpro:before{content:"\f38f"}.fa-desktop:before{content:"\f108"}.fa-dev:before{content:"\f6cc"}.fa-deviantart:before{content:"\f1bd"}.fa-dharmachakra:before{content:"\f655"}.fa-dhl:before{content:"\f790"}.fa-diagnoses:before{content:"\f470"}.fa-diaspora:before{content:"\f791"}.fa-dice:before{content:"\f522"}.fa-dice-d20:before{content:"\f6cf"}.fa-dice-d6:before{content:"\f6d1"}.fa-dice-five:before{content:"\f523"}.fa-dice-four:before{content:"\f524"}.fa-dice-one:before{content:"\f525"}.fa-dice-six:before{content:"\f526"}.fa-dice-three:before{content:"\f527"}.fa-dice-two:before{content:"\f528"}.fa-digg:before{content:"\f1a6"}.fa-digital-ocean:before{content:"\f391"}.fa-digital-tachograph:before{content:"\f566"}.fa-directions:before{content:"\f5eb"}.fa-discord:before{content:"\f392"}.fa-discourse:before{content:"\f393"}.fa-disease:before{content:"\f7fa"}.fa-divide:before{content:"\f529"}.fa-dizzy:before{content:"\f567"}.fa-dna:before{content:"\f471"}.fa-dochub:before{content:"\f394"}.fa-docker:before{content:"\f395"}.fa-dog:before{content:"\f6d3"}.fa-dollar-sign:before{content:"\f155"}.fa-dolly:before{content:"\f472"}.fa-dolly-flatbed:before{content:"\f474"}.fa-donate:before{content:"\f4b9"}.fa-door-closed:before{content:"\f52a"}.fa-door-open:before{content:"\f52b"}.fa-dot-circle:before{content:"\f192"}.fa-dove:before{content:"\f4ba"}.fa-download:before{content:"\f019"}.fa-draft2digital:before{content:"\f396"}.fa-drafting-compass:before{content:"\f568"}.fa-dragon:before{content:"\f6d5"}.fa-draw-polygon:before{content:"\f5ee"}.fa-dribbble:before{content:"\f17d"}.fa-dribbble-square:before{content:"\f397"}.fa-dropbox:before{content:"\f16b"}.fa-drum:before{content:"\f569"}.fa-drum-steelpan:before{content:"\f56a"}.fa-drumstick-bite:before{content:"\f6d7"}.fa-drupal:before{content:"\f1a9"}.fa-dumbbell:before{content:"\f44b"}.fa-dumpster:before{content:"\f793"}.fa-dumpster-fire:before{content:"\f794"}.fa-dungeon:before{content:"\f6d9"}.fa-dyalog:before{content:"\f399"}.fa-earlybirds:before{content:"\f39a"}.fa-ebay:before{content:"\f4f4"}.fa-edge:before{content:"\f282"}.fa-edit:before{content:"\f044"}.fa-egg:before{content:"\f7fb"}.fa-eject:before{content:"\f052"}.fa-elementor:before{content:"\f430"}.fa-ellipsis-h:before{content:"\f141"}.fa-ellipsis-v:before{content:"\f142"}.fa-ello:before{content:"\f5f1"}.fa-ember:before{content:"\f423"}.fa-empire:before{content:"\f1d1"}.fa-envelope:before{content:"\f0e0"}.fa-envelope-open:before{content:"\f2b6"}.fa-envelope-open-text:before{content:"\f658"}.fa-envelope-square:before{content:"\f199"}.fa-envira:before{content:"\f299"}.fa-equals:before{content:"\f52c"}.fa-eraser:before{content:"\f12d"}.fa-erlang:before{content:"\f39d"}.fa-ethereum:before{content:"\f42e"}.fa-ethernet:before{content:"\f796"}.fa-etsy:before{content:"\f2d7"}.fa-euro-sign:before{content:"\f153"}.fa-evernote:before{content:"\f839"}.fa-exchange-alt:before{content:"\f362"}.fa-exclamation:before{content:"\f12a"}.fa-exclamation-circle:before{content:"\f06a"}.fa-exclamation-triangle:before{content:"\f071"}.fa-expand:before{content:"\f065"}.fa-expand-alt:before{content:"\f424"}.fa-expand-arrows-alt:before{content:"\f31e"}.fa-expeditedssl:before{content:"\f23e"}.fa-external-link-alt:before{content:"\f35d"}.fa-external-link-square-alt:before{content:"\f360"}.fa-eye:before{content:"\f06e"}.fa-eye-dropper:before{content:"\f1fb"}.fa-eye-slash:before{content:"\f070"}.fa-facebook:before{content:"\f09a"}.fa-facebook-f:before{content:"\f39e"}.fa-facebook-messenger:before{content:"\f39f"}.fa-facebook-square:before{content:"\f082"}.fa-fan:before{content:"\f863"}.fa-fantasy-flight-games:before{content:"\f6dc"}.fa-fast-backward:before{content:"\f049"}.fa-fast-forward:before{content:"\f050"}.fa-faucet:before{content:"\f905"}.fa-fax:before{content:"\f1ac"}.fa-feather:before{content:"\f52d"}.fa-feather-alt:before{content:"\f56b"}.fa-fedex:before{content:"\f797"}.fa-fedora:before{content:"\f798"}.fa-female:before{content:"\f182"}.fa-fighter-jet:before{content:"\f0fb"}.fa-figma:before{content:"\f799"}.fa-file:before{content:"\f15b"}.fa-file-alt:before{content:"\f15c"}.fa-file-archive:before{content:"\f1c6"}.fa-file-audio:before{content:"\f1c7"}.fa-file-code:before{content:"\f1c9"}.fa-file-contract:before{content:"\f56c"}.fa-file-csv:before{content:"\f6dd"}.fa-file-download:before{content:"\f56d"}.fa-file-excel:before{content:"\f1c3"}.fa-file-export:before{content:"\f56e"}.fa-file-image:before{content:"\f1c5"}.fa-file-import:before{content:"\f56f"}.fa-file-invoice:before{content:"\f570"}.fa-file-invoice-dollar:before{content:"\f571"}.fa-file-medical:before{content:"\f477"}.fa-file-medical-alt:before{content:"\f478"}.fa-file-pdf:before{content:"\f1c1"}.fa-file-powerpoint:before{content:"\f1c4"}.fa-file-prescription:before{content:"\f572"}.fa-file-signature:before{content:"\f573"}.fa-file-upload:before{content:"\f574"}.fa-file-video:before{content:"\f1c8"}.fa-file-word:before{content:"\f1c2"}.fa-fill:before{content:"\f575"}.fa-fill-drip:before{content:"\f576"}.fa-film:before{content:"\f008"}.fa-filter:before{content:"\f0b0"}.fa-fingerprint:before{content:"\f577"}.fa-fire:before{content:"\f06d"}.fa-fire-alt:before{content:"\f7e4"}.fa-fire-extinguisher:before{content:"\f134"}.fa-firefox:before{content:"\f269"}.fa-firefox-browser:before{content:"\f907"}.fa-first-aid:before{content:"\f479"}.fa-first-order:before{content:"\f2b0"}.fa-first-order-alt:before{content:"\f50a"}.fa-firstdraft:before{content:"\f3a1"}.fa-fish:before{content:"\f578"}.fa-fist-raised:before{content:"\f6de"}.fa-flag:before{content:"\f024"}.fa-flag-checkered:before{content:"\f11e"}.fa-flag-usa:before{content:"\f74d"}.fa-flask:before{content:"\f0c3"}.fa-flickr:before{content:"\f16e"}.fa-flipboard:before{content:"\f44d"}.fa-flushed:before{content:"\f579"}.fa-fly:before{content:"\f417"}.fa-folder:before{content:"\f07b"}.fa-folder-minus:before{content:"\f65d"}.fa-folder-open:before{content:"\f07c"}.fa-folder-plus:before{content:"\f65e"}.fa-font:before{content:"\f031"}.fa-font-awesome:before{content:"\f2b4"}.fa-font-awesome-alt:before{content:"\f35c"}.fa-font-awesome-flag:before{content:"\f425"}.fa-font-awesome-logo-full:before{content:"\f4e6"}.fa-fonticons:before{content:"\f280"}.fa-fonticons-fi:before{content:"\f3a2"}.fa-football-ball:before{content:"\f44e"}.fa-fort-awesome:before{content:"\f286"}.fa-fort-awesome-alt:before{content:"\f3a3"}.fa-forumbee:before{content:"\f211"}.fa-forward:before{content:"\f04e"}.fa-foursquare:before{content:"\f180"}.fa-free-code-camp:before{content:"\f2c5"}.fa-freebsd:before{content:"\f3a4"}.fa-frog:before{content:"\f52e"}.fa-frown:before{content:"\f119"}.fa-frown-open:before{content:"\f57a"}.fa-fulcrum:before{content:"\f50b"}.fa-funnel-dollar:before{content:"\f662"}.fa-futbol:before{content:"\f1e3"}.fa-galactic-republic:before{content:"\f50c"}.fa-galactic-senate:before{content:"\f50d"}.fa-gamepad:before{content:"\f11b"}.fa-gas-pump:before{content:"\f52f"}.fa-gavel:before{content:"\f0e3"}.fa-gem:before{content:"\f3a5"}.fa-genderless:before{content:"\f22d"}.fa-get-pocket:before{content:"\f265"}.fa-gg:before{content:"\f260"}.fa-gg-circle:before{content:"\f261"}.fa-ghost:before{content:"\f6e2"}.fa-gift:before{content:"\f06b"}.fa-gifts:before{content:"\f79c"}.fa-git:before{content:"\f1d3"}.fa-git-alt:before{content:"\f841"}.fa-git-square:before{content:"\f1d2"}.fa-github:before{content:"\f09b"}.fa-github-alt:before{content:"\f113"}.fa-github-square:before{content:"\f092"}.fa-gitkraken:before{content:"\f3a6"}.fa-gitlab:before{content:"\f296"}.fa-gitter:before{content:"\f426"}.fa-glass-cheers:before{content:"\f79f"}.fa-glass-martini:before{content:"\f000"}.fa-glass-martini-alt:before{content:"\f57b"}.fa-glass-whiskey:before{content:"\f7a0"}.fa-glasses:before{content:"\f530"}.fa-glide:before{content:"\f2a5"}.fa-glide-g:before{content:"\f2a6"}.fa-globe:before{content:"\f0ac"}.fa-globe-africa:before{content:"\f57c"}.fa-globe-americas:before{content:"\f57d"}.fa-globe-asia:before{content:"\f57e"}.fa-globe-europe:before{content:"\f7a2"}.fa-gofore:before{content:"\f3a7"}.fa-golf-ball:before{content:"\f450"}.fa-goodreads:before{content:"\f3a8"}.fa-goodreads-g:before{content:"\f3a9"}.fa-google:before{content:"\f1a0"}.fa-google-drive:before{content:"\f3aa"}.fa-google-play:before{content:"\f3ab"}.fa-google-plus:before{content:"\f2b3"}.fa-google-plus-g:before{content:"\f0d5"}.fa-google-plus-square:before{content:"\f0d4"}.fa-google-wallet:before{content:"\f1ee"}.fa-gopuram:before{content:"\f664"}.fa-graduation-cap:before{content:"\f19d"}.fa-gratipay:before{content:"\f184"}.fa-grav:before{content:"\f2d6"}.fa-greater-than:before{content:"\f531"}.fa-greater-than-equal:before{content:"\f532"}.fa-grimace:before{content:"\f57f"}.fa-grin:before{content:"\f580"}.fa-grin-alt:before{content:"\f581"}.fa-grin-beam:before{content:"\f582"}.fa-grin-beam-sweat:before{content:"\f583"}.fa-grin-hearts:before{content:"\f584"}.fa-grin-squint:before{content:"\f585"}.fa-grin-squint-tears:before{content:"\f586"}.fa-grin-stars:before{content:"\f587"}.fa-grin-tears:before{content:"\f588"}.fa-grin-tongue:before{content:"\f589"}.fa-grin-tongue-squint:before{content:"\f58a"}.fa-grin-tongue-wink:before{content:"\f58b"}.fa-grin-wink:before{content:"\f58c"}.fa-grip-horizontal:before{content:"\f58d"}.fa-grip-lines:before{content:"\f7a4"}.fa-grip-lines-vertical:before{content:"\f7a5"}.fa-grip-vertical:before{content:"\f58e"}.fa-gripfire:before{content:"\f3ac"}.fa-grunt:before{content:"\f3ad"}.fa-guitar:before{content:"\f7a6"}.fa-gulp:before{content:"\f3ae"}.fa-h-square:before{content:"\f0fd"}.fa-hacker-news:before{content:"\f1d4"}.fa-hacker-news-square:before{content:"\f3af"}.fa-hackerrank:before{content:"\f5f7"}.fa-hamburger:before{content:"\f805"}.fa-hammer:before{content:"\f6e3"}.fa-hamsa:before{content:"\f665"}.fa-hand-holding:before{content:"\f4bd"}.fa-hand-holding-heart:before{content:"\f4be"}.fa-hand-holding-medical:before{content:"\f95c"}.fa-hand-holding-usd:before{content:"\f4c0"}.fa-hand-holding-water:before{content:"\f4c1"}.fa-hand-lizard:before{content:"\f258"}.fa-hand-middle-finger:before{content:"\f806"}.fa-hand-paper:before{content:"\f256"}.fa-hand-peace:before{content:"\f25b"}.fa-hand-point-down:before{content:"\f0a7"}.fa-hand-point-left:before{content:"\f0a5"}.fa-hand-point-right:before{content:"\f0a4"}.fa-hand-point-up:before{content:"\f0a6"}.fa-hand-pointer:before{content:"\f25a"}.fa-hand-rock:before{content:"\f255"}.fa-hand-scissors:before{content:"\f257"}.fa-hand-sparkles:before{content:"\f95d"}.fa-hand-spock:before{content:"\f259"}.fa-hands:before{content:"\f4c2"}.fa-hands-helping:before{content:"\f4c4"}.fa-hands-wash:before{content:"\f95e"}.fa-handshake:before{content:"\f2b5"}.fa-handshake-alt-slash:before{content:"\f95f"}.fa-handshake-slash:before{content:"\f960"}.fa-hanukiah:before{content:"\f6e6"}.fa-hard-hat:before{content:"\f807"}.fa-hashtag:before{content:"\f292"}.fa-hat-cowboy:before{content:"\f8c0"}.fa-hat-cowboy-side:before{content:"\f8c1"}.fa-hat-wizard:before{content:"\f6e8"}.fa-hdd:before{content:"\f0a0"}.fa-head-side-cough:before{content:"\f961"}.fa-head-side-cough-slash:before{content:"\f962"}.fa-head-side-mask:before{content:"\f963"}.fa-head-side-virus:before{content:"\f964"}.fa-heading:before{content:"\f1dc"}.fa-headphones:before{content:"\f025"}.fa-headphones-alt:before{content:"\f58f"}.fa-headset:before{content:"\f590"}.fa-heart:before{content:"\f004"}.fa-heart-broken:before{content:"\f7a9"}.fa-heartbeat:before{content:"\f21e"}.fa-helicopter:before{content:"\f533"}.fa-highlighter:before{content:"\f591"}.fa-hiking:before{content:"\f6ec"}.fa-hippo:before{content:"\f6ed"}.fa-hips:before{content:"\f452"}.fa-hire-a-helper:before{content:"\f3b0"}.fa-history:before{content:"\f1da"}.fa-hockey-puck:before{content:"\f453"}.fa-holly-berry:before{content:"\f7aa"}.fa-home:before{content:"\f015"}.fa-hooli:before{content:"\f427"}.fa-hornbill:before{content:"\f592"}.fa-horse:before{content:"\f6f0"}.fa-horse-head:before{content:"\f7ab"}.fa-hospital:before{content:"\f0f8"}.fa-hospital-alt:before{content:"\f47d"}.fa-hospital-symbol:before{content:"\f47e"}.fa-hospital-user:before{content:"\f80d"}.fa-hot-tub:before{content:"\f593"}.fa-hotdog:before{content:"\f80f"}.fa-hotel:before{content:"\f594"}.fa-hotjar:before{content:"\f3b1"}.fa-hourglass:before{content:"\f254"}.fa-hourglass-end:before{content:"\f253"}.fa-hourglass-half:before{content:"\f252"}.fa-hourglass-start:before{content:"\f251"}.fa-house-damage:before{content:"\f6f1"}.fa-house-user:before{content:"\f965"}.fa-houzz:before{content:"\f27c"}.fa-hryvnia:before{content:"\f6f2"}.fa-html5:before{content:"\f13b"}.fa-hubspot:before{content:"\f3b2"}.fa-i-cursor:before{content:"\f246"}.fa-ice-cream:before{content:"\f810"}.fa-icicles:before{content:"\f7ad"}.fa-icons:before{content:"\f86d"}.fa-id-badge:before{content:"\f2c1"}.fa-id-card:before{content:"\f2c2"}.fa-id-card-alt:before{content:"\f47f"}.fa-ideal:before{content:"\f913"}.fa-igloo:before{content:"\f7ae"}.fa-image:before{content:"\f03e"}.fa-images:before{content:"\f302"}.fa-imdb:before{content:"\f2d8"}.fa-inbox:before{content:"\f01c"}.fa-indent:before{content:"\f03c"}.fa-industry:before{content:"\f275"}.fa-infinity:before{content:"\f534"}.fa-info:before{content:"\f129"}.fa-info-circle:before{content:"\f05a"}.fa-instagram:before{content:"\f16d"}.fa-instagram-square:before{content:"\f955"}.fa-intercom:before{content:"\f7af"}.fa-internet-explorer:before{content:"\f26b"}.fa-invision:before{content:"\f7b0"}.fa-ioxhost:before{content:"\f208"}.fa-italic:before{content:"\f033"}.fa-itch-io:before{content:"\f83a"}.fa-itunes:before{content:"\f3b4"}.fa-itunes-note:before{content:"\f3b5"}.fa-java:before{content:"\f4e4"}.fa-jedi:before{content:"\f669"}.fa-jedi-order:before{content:"\f50e"}.fa-jenkins:before{content:"\f3b6"}.fa-jira:before{content:"\f7b1"}.fa-joget:before{content:"\f3b7"}.fa-joint:before{content:"\f595"}.fa-joomla:before{content:"\f1aa"}.fa-journal-whills:before{content:"\f66a"}.fa-js:before{content:"\f3b8"}.fa-js-square:before{content:"\f3b9"}.fa-jsfiddle:before{content:"\f1cc"}.fa-kaaba:before{content:"\f66b"}.fa-kaggle:before{content:"\f5fa"}.fa-key:before{content:"\f084"}.fa-keybase:before{content:"\f4f5"}.fa-keyboard:before{content:"\f11c"}.fa-keycdn:before{content:"\f3ba"}.fa-khanda:before{content:"\f66d"}.fa-kickstarter:before{content:"\f3bb"}.fa-kickstarter-k:before{content:"\f3bc"}.fa-kiss:before{content:"\f596"}.fa-kiss-beam:before{content:"\f597"}.fa-kiss-wink-heart:before{content:"\f598"}.fa-kiwi-bird:before{content:"\f535"}.fa-korvue:before{content:"\f42f"}.fa-landmark:before{content:"\f66f"}.fa-language:before{content:"\f1ab"}.fa-laptop:before{content:"\f109"}.fa-laptop-code:before{content:"\f5fc"}.fa-laptop-house:before{content:"\f966"}.fa-laptop-medical:before{content:"\f812"}.fa-laravel:before{content:"\f3bd"}.fa-lastfm:before{content:"\f202"}.fa-lastfm-square:before{content:"\f203"}.fa-laugh:before{content:"\f599"}.fa-laugh-beam:before{content:"\f59a"}.fa-laugh-squint:before{content:"\f59b"}.fa-laugh-wink:before{content:"\f59c"}.fa-layer-group:before{content:"\f5fd"}.fa-leaf:before{content:"\f06c"}.fa-leanpub:before{content:"\f212"}.fa-lemon:before{content:"\f094"}.fa-less:before{content:"\f41d"}.fa-less-than:before{content:"\f536"}.fa-less-than-equal:before{content:"\f537"}.fa-level-down-alt:before{content:"\f3be"}.fa-level-up-alt:before{content:"\f3bf"}.fa-life-ring:before{content:"\f1cd"}.fa-lightbulb:before{content:"\f0eb"}.fa-line:before{content:"\f3c0"}.fa-link:before{content:"\f0c1"}.fa-linkedin:before{content:"\f08c"}.fa-linkedin-in:before{content:"\f0e1"}.fa-linode:before{content:"\f2b8"}.fa-linux:before{content:"\f17c"}.fa-lira-sign:before{content:"\f195"}.fa-list:before{content:"\f03a"}.fa-list-alt:before{content:"\f022"}.fa-list-ol:before{content:"\f0cb"}.fa-list-ul:before{content:"\f0ca"}.fa-location-arrow:before{content:"\f124"}.fa-lock:before{content:"\f023"}.fa-lock-open:before{content:"\f3c1"}.fa-long-arrow-alt-down:before{content:"\f309"}.fa-long-arrow-alt-left:before{content:"\f30a"}.fa-long-arrow-alt-right:before{content:"\f30b"}.fa-long-arrow-alt-up:before{content:"\f30c"}.fa-low-vision:before{content:"\f2a8"}.fa-luggage-cart:before{content:"\f59d"}.fa-lungs:before{content:"\f604"}.fa-lungs-virus:before{content:"\f967"}.fa-lyft:before{content:"\f3c3"}.fa-magento:before{content:"\f3c4"}.fa-magic:before{content:"\f0d0"}.fa-magnet:before{content:"\f076"}.fa-mail-bulk:before{content:"\f674"}.fa-mailchimp:before{content:"\f59e"}.fa-male:before{content:"\f183"}.fa-mandalorian:before{content:"\f50f"}.fa-map:before{content:"\f279"}.fa-map-marked:before{content:"\f59f"}.fa-map-marked-alt:before{content:"\f5a0"}.fa-map-marker:before{content:"\f041"}.fa-map-marker-alt:before{content:"\f3c5"}.fa-map-pin:before{content:"\f276"}.fa-map-signs:before{content:"\f277"}.fa-markdown:before{content:"\f60f"}.fa-marker:before{content:"\f5a1"}.fa-mars:before{content:"\f222"}.fa-mars-double:before{content:"\f227"}.fa-mars-stroke:before{content:"\f229"}.fa-mars-stroke-h:before{content:"\f22b"}.fa-mars-stroke-v:before{content:"\f22a"}.fa-mask:before{content:"\f6fa"}.fa-mastodon:before{content:"\f4f6"}.fa-maxcdn:before{content:"\f136"}.fa-mdb:before{content:"\f8ca"}.fa-medal:before{content:"\f5a2"}.fa-medapps:before{content:"\f3c6"}.fa-medium:before{content:"\f23a"}.fa-medium-m:before{content:"\f3c7"}.fa-medkit:before{content:"\f0fa"}.fa-medrt:before{content:"\f3c8"}.fa-meetup:before{content:"\f2e0"}.fa-megaport:before{content:"\f5a3"}.fa-meh:before{content:"\f11a"}.fa-meh-blank:before{content:"\f5a4"}.fa-meh-rolling-eyes:before{content:"\f5a5"}.fa-memory:before{content:"\f538"}.fa-mendeley:before{content:"\f7b3"}.fa-menorah:before{content:"\f676"}.fa-mercury:before{content:"\f223"}.fa-meteor:before{content:"\f753"}.fa-microblog:before{content:"\f91a"}.fa-microchip:before{content:"\f2db"}.fa-microphone:before{content:"\f130"}.fa-microphone-alt:before{content:"\f3c9"}.fa-microphone-alt-slash:before{content:"\f539"}.fa-microphone-slash:before{content:"\f131"}.fa-microscope:before{content:"\f610"}.fa-microsoft:before{content:"\f3ca"}.fa-minus:before{content:"\f068"}.fa-minus-circle:before{content:"\f056"}.fa-minus-square:before{content:"\f146"}.fa-mitten:before{content:"\f7b5"}.fa-mix:before{content:"\f3cb"}.fa-mixcloud:before{content:"\f289"}.fa-mixer:before{content:"\f956"}.fa-mizuni:before{content:"\f3cc"}.fa-mobile:before{content:"\f10b"}.fa-mobile-alt:before{content:"\f3cd"}.fa-modx:before{content:"\f285"}.fa-monero:before{content:"\f3d0"}.fa-money-bill:before{content:"\f0d6"}.fa-money-bill-alt:before{content:"\f3d1"}.fa-money-bill-wave:before{content:"\f53a"}.fa-money-bill-wave-alt:before{content:"\f53b"}.fa-money-check:before{content:"\f53c"}.fa-money-check-alt:before{content:"\f53d"}.fa-monument:before{content:"\f5a6"}.fa-moon:before{content:"\f186"}.fa-mortar-pestle:before{content:"\f5a7"}.fa-mosque:before{content:"\f678"}.fa-motorcycle:before{content:"\f21c"}.fa-mountain:before{content:"\f6fc"}.fa-mouse:before{content:"\f8cc"}.fa-mouse-pointer:before{content:"\f245"}.fa-mug-hot:before{content:"\f7b6"}.fa-music:before{content:"\f001"}.fa-napster:before{content:"\f3d2"}.fa-neos:before{content:"\f612"}.fa-network-wired:before{content:"\f6ff"}.fa-neuter:before{content:"\f22c"}.fa-newspaper:before{content:"\f1ea"}.fa-nimblr:before{content:"\f5a8"}.fa-node:before{content:"\f419"}.fa-node-js:before{content:"\f3d3"}.fa-not-equal:before{content:"\f53e"}.fa-notes-medical:before{content:"\f481"}.fa-npm:before{content:"\f3d4"}.fa-ns8:before{content:"\f3d5"}.fa-nutritionix:before{content:"\f3d6"}.fa-object-group:before{content:"\f247"}.fa-object-ungroup:before{content:"\f248"}.fa-odnoklassniki:before{content:"\f263"}.fa-odnoklassniki-square:before{content:"\f264"}.fa-oil-can:before{content:"\f613"}.fa-old-republic:before{content:"\f510"}.fa-om:before{content:"\f679"}.fa-opencart:before{content:"\f23d"}.fa-openid:before{content:"\f19b"}.fa-opera:before{content:"\f26a"}.fa-optin-monster:before{content:"\f23c"}.fa-orcid:before{content:"\f8d2"}.fa-osi:before{content:"\f41a"}.fa-otter:before{content:"\f700"}.fa-outdent:before{content:"\f03b"}.fa-page4:before{content:"\f3d7"}.fa-pagelines:before{content:"\f18c"}.fa-pager:before{content:"\f815"}.fa-paint-brush:before{content:"\f1fc"}.fa-paint-roller:before{content:"\f5aa"}.fa-palette:before{content:"\f53f"}.fa-palfed:before{content:"\f3d8"}.fa-pallet:before{content:"\f482"}.fa-paper-plane:before{content:"\f1d8"}.fa-paperclip:before{content:"\f0c6"}.fa-parachute-box:before{content:"\f4cd"}.fa-paragraph:before{content:"\f1dd"}.fa-parking:before{content:"\f540"}.fa-passport:before{content:"\f5ab"}.fa-pastafarianism:before{content:"\f67b"}.fa-paste:before{content:"\f0ea"}.fa-patreon:before{content:"\f3d9"}.fa-pause:before{content:"\f04c"}.fa-pause-circle:before{content:"\f28b"}.fa-paw:before{content:"\f1b0"}.fa-paypal:before{content:"\f1ed"}.fa-peace:before{content:"\f67c"}.fa-pen:before{content:"\f304"}.fa-pen-alt:before{content:"\f305"}.fa-pen-fancy:before{content:"\f5ac"}.fa-pen-nib:before{content:"\f5ad"}.fa-pen-square:before{content:"\f14b"}.fa-pencil-alt:before{content:"\f303"}.fa-pencil-ruler:before{content:"\f5ae"}.fa-penny-arcade:before{content:"\f704"}.fa-people-arrows:before{content:"\f968"}.fa-people-carry:before{content:"\f4ce"}.fa-pepper-hot:before{content:"\f816"}.fa-percent:before{content:"\f295"}.fa-percentage:before{content:"\f541"}.fa-periscope:before{content:"\f3da"}.fa-person-booth:before{content:"\f756"}.fa-phabricator:before{content:"\f3db"}.fa-phoenix-framework:before{content:"\f3dc"}.fa-phoenix-squadron:before{content:"\f511"}.fa-phone:before{content:"\f095"}.fa-phone-alt:before{content:"\f879"}.fa-phone-slash:before{content:"\f3dd"}.fa-phone-square:before{content:"\f098"}.fa-phone-square-alt:before{content:"\f87b"}.fa-phone-volume:before{content:"\f2a0"}.fa-photo-video:before{content:"\f87c"}.fa-php:before{content:"\f457"}.fa-pied-piper:before{content:"\f2ae"}.fa-pied-piper-alt:before{content:"\f1a8"}.fa-pied-piper-hat:before{content:"\f4e5"}.fa-pied-piper-pp:before{content:"\f1a7"}.fa-pied-piper-square:before{content:"\f91e"}.fa-piggy-bank:before{content:"\f4d3"}.fa-pills:before{content:"\f484"}.fa-pinterest:before{content:"\f0d2"}.fa-pinterest-p:before{content:"\f231"}.fa-pinterest-square:before{content:"\f0d3"}.fa-pizza-slice:before{content:"\f818"}.fa-place-of-worship:before{content:"\f67f"}.fa-plane:before{content:"\f072"}.fa-plane-arrival:before{content:"\f5af"}.fa-plane-departure:before{content:"\f5b0"}.fa-plane-slash:before{content:"\f969"}.fa-play:before{content:"\f04b"}.fa-play-circle:before{content:"\f144"}.fa-playstation:before{content:"\f3df"}.fa-plug:before{content:"\f1e6"}.fa-plus:before{content:"\f067"}.fa-plus-circle:before{content:"\f055"}.fa-plus-square:before{content:"\f0fe"}.fa-podcast:before{content:"\f2ce"}.fa-poll:before{content:"\f681"}.fa-poll-h:before{content:"\f682"}.fa-poo:before{content:"\f2fe"}.fa-poo-storm:before{content:"\f75a"}.fa-poop:before{content:"\f619"}.fa-portrait:before{content:"\f3e0"}.fa-pound-sign:before{content:"\f154"}.fa-power-off:before{content:"\f011"}.fa-pray:before{content:"\f683"}.fa-praying-hands:before{content:"\f684"}.fa-prescription:before{content:"\f5b1"}.fa-prescription-bottle:before{content:"\f485"}.fa-prescription-bottle-alt:before{content:"\f486"}.fa-print:before{content:"\f02f"}.fa-procedures:before{content:"\f487"}.fa-product-hunt:before{content:"\f288"}.fa-project-diagram:before{content:"\f542"}.fa-pump-medical:before{content:"\f96a"}.fa-pump-soap:before{content:"\f96b"}.fa-pushed:before{content:"\f3e1"}.fa-puzzle-piece:before{content:"\f12e"}.fa-python:before{content:"\f3e2"}.fa-qq:before{content:"\f1d6"}.fa-qrcode:before{content:"\f029"}.fa-question:before{content:"\f128"}.fa-question-circle:before{content:"\f059"}.fa-quidditch:before{content:"\f458"}.fa-quinscape:before{content:"\f459"}.fa-quora:before{content:"\f2c4"}.fa-quote-left:before{content:"\f10d"}.fa-quote-right:before{content:"\f10e"}.fa-quran:before{content:"\f687"}.fa-r-project:before{content:"\f4f7"}.fa-radiation:before{content:"\f7b9"}.fa-radiation-alt:before{content:"\f7ba"}.fa-rainbow:before{content:"\f75b"}.fa-random:before{content:"\f074"}.fa-raspberry-pi:before{content:"\f7bb"}.fa-ravelry:before{content:"\f2d9"}.fa-react:before{content:"\f41b"}.fa-reacteurope:before{content:"\f75d"}.fa-readme:before{content:"\f4d5"}.fa-rebel:before{content:"\f1d0"}.fa-receipt:before{content:"\f543"}.fa-record-vinyl:before{content:"\f8d9"}.fa-recycle:before{content:"\f1b8"}.fa-red-river:before{content:"\f3e3"}.fa-reddit:before{content:"\f1a1"}.fa-reddit-alien:before{content:"\f281"}.fa-reddit-square:before{content:"\f1a2"}.fa-redhat:before{content:"\f7bc"}.fa-redo:before{content:"\f01e"}.fa-redo-alt:before{content:"\f2f9"}.fa-registered:before{content:"\f25d"}.fa-remove-format:before{content:"\f87d"}.fa-renren:before{content:"\f18b"}.fa-reply:before{content:"\f3e5"}.fa-reply-all:before{content:"\f122"}.fa-replyd:before{content:"\f3e6"}.fa-republican:before{content:"\f75e"}.fa-researchgate:before{content:"\f4f8"}.fa-resolving:before{content:"\f3e7"}.fa-restroom:before{content:"\f7bd"}.fa-retweet:before{content:"\f079"}.fa-rev:before{content:"\f5b2"}.fa-ribbon:before{content:"\f4d6"}.fa-ring:before{content:"\f70b"}.fa-road:before{content:"\f018"}.fa-robot:before{content:"\f544"}.fa-rocket:before{content:"\f135"}.fa-rocketchat:before{content:"\f3e8"}.fa-rockrms:before{content:"\f3e9"}.fa-route:before{content:"\f4d7"}.fa-rss:before{content:"\f09e"}.fa-rss-square:before{content:"\f143"}.fa-ruble-sign:before{content:"\f158"}.fa-ruler:before{content:"\f545"}.fa-ruler-combined:before{content:"\f546"}.fa-ruler-horizontal:before{content:"\f547"}.fa-ruler-vertical:before{content:"\f548"}.fa-running:before{content:"\f70c"}.fa-rupee-sign:before{content:"\f156"}.fa-sad-cry:before{content:"\f5b3"}.fa-sad-tear:before{content:"\f5b4"}.fa-safari:before{content:"\f267"}.fa-salesforce:before{content:"\f83b"}.fa-sass:before{content:"\f41e"}.fa-satellite:before{content:"\f7bf"}.fa-satellite-dish:before{content:"\f7c0"}.fa-save:before{content:"\f0c7"}.fa-schlix:before{content:"\f3ea"}.fa-school:before{content:"\f549"}.fa-screwdriver:before{content:"\f54a"}.fa-scribd:before{content:"\f28a"}.fa-scroll:before{content:"\f70e"}.fa-sd-card:before{content:"\f7c2"}.fa-search:before{content:"\f002"}.fa-search-dollar:before{content:"\f688"}.fa-search-location:before{content:"\f689"}.fa-search-minus:before{content:"\f010"}.fa-search-plus:before{content:"\f00e"}.fa-searchengin:before{content:"\f3eb"}.fa-seedling:before{content:"\f4d8"}.fa-sellcast:before{content:"\f2da"}.fa-sellsy:before{content:"\f213"}.fa-server:before{content:"\f233"}.fa-servicestack:before{content:"\f3ec"}.fa-shapes:before{content:"\f61f"}.fa-share:before{content:"\f064"}.fa-share-alt:before{content:"\f1e0"}.fa-share-alt-square:before{content:"\f1e1"}.fa-share-square:before{content:"\f14d"}.fa-shekel-sign:before{content:"\f20b"}.fa-shield-alt:before{content:"\f3ed"}.fa-shield-virus:before{content:"\f96c"}.fa-ship:before{content:"\f21a"}.fa-shipping-fast:before{content:"\f48b"}.fa-shirtsinbulk:before{content:"\f214"}.fa-shoe-prints:before{content:"\f54b"}.fa-shopify:before{content:"\f957"}.fa-shopping-bag:before{content:"\f290"}.fa-shopping-basket:before{content:"\f291"}.fa-shopping-cart:before{content:"\f07a"}.fa-shopware:before{content:"\f5b5"}.fa-shower:before{content:"\f2cc"}.fa-shuttle-van:before{content:"\f5b6"}.fa-sign:before{content:"\f4d9"}.fa-sign-in-alt:before{content:"\f2f6"}.fa-sign-language:before{content:"\f2a7"}.fa-sign-out-alt:before{content:"\f2f5"}.fa-signal:before{content:"\f012"}.fa-signature:before{content:"\f5b7"}.fa-sim-card:before{content:"\f7c4"}.fa-simplybuilt:before{content:"\f215"}.fa-sistrix:before{content:"\f3ee"}.fa-sitemap:before{content:"\f0e8"}.fa-sith:before{content:"\f512"}.fa-skating:before{content:"\f7c5"}.fa-sketch:before{content:"\f7c6"}.fa-skiing:before{content:"\f7c9"}.fa-skiing-nordic:before{content:"\f7ca"}.fa-skull:before{content:"\f54c"}.fa-skull-crossbones:before{content:"\f714"}.fa-skyatlas:before{content:"\f216"}.fa-skype:before{content:"\f17e"}.fa-slack:before{content:"\f198"}.fa-slack-hash:before{content:"\f3ef"}.fa-slash:before{content:"\f715"}.fa-sleigh:before{content:"\f7cc"}.fa-sliders-h:before{content:"\f1de"}.fa-slideshare:before{content:"\f1e7"}.fa-smile:before{content:"\f118"}.fa-smile-beam:before{content:"\f5b8"}.fa-smile-wink:before{content:"\f4da"}.fa-smog:before{content:"\f75f"}.fa-smoking:before{content:"\f48d"}.fa-smoking-ban:before{content:"\f54d"}.fa-sms:before{content:"\f7cd"}.fa-snapchat:before{content:"\f2ab"}.fa-snapchat-ghost:before{content:"\f2ac"}.fa-snapchat-square:before{content:"\f2ad"}.fa-snowboarding:before{content:"\f7ce"}.fa-snowflake:before{content:"\f2dc"}.fa-snowman:before{content:"\f7d0"}.fa-snowplow:before{content:"\f7d2"}.fa-soap:before{content:"\f96e"}.fa-socks:before{content:"\f696"}.fa-solar-panel:before{content:"\f5ba"}.fa-sort:before{content:"\f0dc"}.fa-sort-alpha-down:before{content:"\f15d"}.fa-sort-alpha-down-alt:before{content:"\f881"}.fa-sort-alpha-up:before{content:"\f15e"}.fa-sort-alpha-up-alt:before{content:"\f882"}.fa-sort-amount-down:before{content:"\f160"}.fa-sort-amount-down-alt:before{content:"\f884"}.fa-sort-amount-up:before{content:"\f161"}.fa-sort-amount-up-alt:before{content:"\f885"}.fa-sort-down:before{content:"\f0dd"}.fa-sort-numeric-down:before{content:"\f162"}.fa-sort-numeric-down-alt:before{content:"\f886"}.fa-sort-numeric-up:before{content:"\f163"}.fa-sort-numeric-up-alt:before{content:"\f887"}.fa-sort-up:before{content:"\f0de"}.fa-soundcloud:before{content:"\f1be"}.fa-sourcetree:before{content:"\f7d3"}.fa-spa:before{content:"\f5bb"}.fa-space-shuttle:before{content:"\f197"}.fa-speakap:before{content:"\f3f3"}.fa-speaker-deck:before{content:"\f83c"}.fa-spell-check:before{content:"\f891"}.fa-spider:before{content:"\f717"}.fa-spinner:before{content:"\f110"}.fa-splotch:before{content:"\f5bc"}.fa-spotify:before{content:"\f1bc"}.fa-spray-can:before{content:"\f5bd"}.fa-square:before{content:"\f0c8"}.fa-square-full:before{content:"\f45c"}.fa-square-root-alt:before{content:"\f698"}.fa-squarespace:before{content:"\f5be"}.fa-stack-exchange:before{content:"\f18d"}.fa-stack-overflow:before{content:"\f16c"}.fa-stackpath:before{content:"\f842"}.fa-stamp:before{content:"\f5bf"}.fa-star:before{content:"\f005"}.fa-star-and-crescent:before{content:"\f699"}.fa-star-half:before{content:"\f089"}.fa-star-half-alt:before{content:"\f5c0"}.fa-star-of-david:before{content:"\f69a"}.fa-star-of-life:before{content:"\f621"}.fa-staylinked:before{content:"\f3f5"}.fa-steam:before{content:"\f1b6"}.fa-steam-square:before{content:"\f1b7"}.fa-steam-symbol:before{content:"\f3f6"}.fa-step-backward:before{content:"\f048"}.fa-step-forward:before{content:"\f051"}.fa-stethoscope:before{content:"\f0f1"}.fa-sticker-mule:before{content:"\f3f7"}.fa-sticky-note:before{content:"\f249"}.fa-stop:before{content:"\f04d"}.fa-stop-circle:before{content:"\f28d"}.fa-stopwatch:before{content:"\f2f2"}.fa-stopwatch-20:before{content:"\f96f"}.fa-store:before{content:"\f54e"}.fa-store-alt:before{content:"\f54f"}.fa-store-alt-slash:before{content:"\f970"}.fa-store-slash:before{content:"\f971"}.fa-strava:before{content:"\f428"}.fa-stream:before{content:"\f550"}.fa-street-view:before{content:"\f21d"}.fa-strikethrough:before{content:"\f0cc"}.fa-stripe:before{content:"\f429"}.fa-stripe-s:before{content:"\f42a"}.fa-stroopwafel:before{content:"\f551"}.fa-studiovinari:before{content:"\f3f8"}.fa-stumbleupon:before{content:"\f1a4"}.fa-stumbleupon-circle:before{content:"\f1a3"}.fa-subscript:before{content:"\f12c"}.fa-subway:before{content:"\f239"}.fa-suitcase:before{content:"\f0f2"}.fa-suitcase-rolling:before{content:"\f5c1"}.fa-sun:before{content:"\f185"}.fa-superpowers:before{content:"\f2dd"}.fa-superscript:before{content:"\f12b"}.fa-supple:before{content:"\f3f9"}.fa-surprise:before{content:"\f5c2"}.fa-suse:before{content:"\f7d6"}.fa-swatchbook:before{content:"\f5c3"}.fa-swift:before{content:"\f8e1"}.fa-swimmer:before{content:"\f5c4"}.fa-swimming-pool:before{content:"\f5c5"}.fa-symfony:before{content:"\f83d"}.fa-synagogue:before{content:"\f69b"}.fa-sync:before{content:"\f021"}.fa-sync-alt:before{content:"\f2f1"}.fa-syringe:before{content:"\f48e"}.fa-table:before{content:"\f0ce"}.fa-table-tennis:before{content:"\f45d"}.fa-tablet:before{content:"\f10a"}.fa-tablet-alt:before{content:"\f3fa"}.fa-tablets:before{content:"\f490"}.fa-tachometer-alt:before{content:"\f3fd"}.fa-tag:before{content:"\f02b"}.fa-tags:before{content:"\f02c"}.fa-tape:before{content:"\f4db"}.fa-tasks:before{content:"\f0ae"}.fa-taxi:before{content:"\f1ba"}.fa-teamspeak:before{content:"\f4f9"}.fa-teeth:before{content:"\f62e"}.fa-teeth-open:before{content:"\f62f"}.fa-telegram:before{content:"\f2c6"}.fa-telegram-plane:before{content:"\f3fe"}.fa-temperature-high:before{content:"\f769"}.fa-temperature-low:before{content:"\f76b"}.fa-tencent-weibo:before{content:"\f1d5"}.fa-tenge:before{content:"\f7d7"}.fa-terminal:before{content:"\f120"}.fa-text-height:before{content:"\f034"}.fa-text-width:before{content:"\f035"}.fa-th:before{content:"\f00a"}.fa-th-large:before{content:"\f009"}.fa-th-list:before{content:"\f00b"}.fa-the-red-yeti:before{content:"\f69d"}.fa-theater-masks:before{content:"\f630"}.fa-themeco:before{content:"\f5c6"}.fa-themeisle:before{content:"\f2b2"}.fa-thermometer:before{content:"\f491"}.fa-thermometer-empty:before{content:"\f2cb"}.fa-thermometer-full:before{content:"\f2c7"}.fa-thermometer-half:before{content:"\f2c9"}.fa-thermometer-quarter:before{content:"\f2ca"}.fa-thermometer-three-quarters:before{content:"\f2c8"}.fa-think-peaks:before{content:"\f731"}.fa-thumbs-down:before{content:"\f165"}.fa-thumbs-up:before{content:"\f164"}.fa-thumbtack:before{content:"\f08d"}.fa-ticket-alt:before{content:"\f3ff"}.fa-times:before{content:"\f00d"}.fa-times-circle:before{content:"\f057"}.fa-tint:before{content:"\f043"}.fa-tint-slash:before{content:"\f5c7"}.fa-tired:before{content:"\f5c8"}.fa-toggle-off:before{content:"\f204"}.fa-toggle-on:before{content:"\f205"}.fa-toilet:before{content:"\f7d8"}.fa-toilet-paper:before{content:"\f71e"}.fa-toilet-paper-slash:before{content:"\f972"}.fa-toolbox:before{content:"\f552"}.fa-tools:before{content:"\f7d9"}.fa-tooth:before{content:"\f5c9"}.fa-torah:before{content:"\f6a0"}.fa-torii-gate:before{content:"\f6a1"}.fa-tractor:before{content:"\f722"}.fa-trade-federation:before{content:"\f513"}.fa-trademark:before{content:"\f25c"}.fa-traffic-light:before{content:"\f637"}.fa-trailer:before{content:"\f941"}.fa-train:before{content:"\f238"}.fa-tram:before{content:"\f7da"}.fa-transgender:before{content:"\f224"}.fa-transgender-alt:before{content:"\f225"}.fa-trash:before{content:"\f1f8"}.fa-trash-alt:before{content:"\f2ed"}.fa-trash-restore:before{content:"\f829"}.fa-trash-restore-alt:before{content:"\f82a"}.fa-tree:before{content:"\f1bb"}.fa-trello:before{content:"\f181"}.fa-tripadvisor:before{content:"\f262"}.fa-trophy:before{content:"\f091"}.fa-truck:before{content:"\f0d1"}.fa-truck-loading:before{content:"\f4de"}.fa-truck-monster:before{content:"\f63b"}.fa-truck-moving:before{content:"\f4df"}.fa-truck-pickup:before{content:"\f63c"}.fa-tshirt:before{content:"\f553"}.fa-tty:before{content:"\f1e4"}.fa-tumblr:before{content:"\f173"}.fa-tumblr-square:before{content:"\f174"}.fa-tv:before{content:"\f26c"}.fa-twitch:before{content:"\f1e8"}.fa-twitter:before{content:"\f099"}.fa-twitter-square:before{content:"\f081"}.fa-typo3:before{content:"\f42b"}.fa-uber:before{content:"\f402"}.fa-ubuntu:before{content:"\f7df"}.fa-uikit:before{content:"\f403"}.fa-umbraco:before{content:"\f8e8"}.fa-umbrella:before{content:"\f0e9"}.fa-umbrella-beach:before{content:"\f5ca"}.fa-underline:before{content:"\f0cd"}.fa-undo:before{content:"\f0e2"}.fa-undo-alt:before{content:"\f2ea"}.fa-uniregistry:before{content:"\f404"}.fa-unity:before{content:"\f949"}.fa-universal-access:before{content:"\f29a"}.fa-university:before{content:"\f19c"}.fa-unlink:before{content:"\f127"}.fa-unlock:before{content:"\f09c"}.fa-unlock-alt:before{content:"\f13e"}.fa-untappd:before{content:"\f405"}.fa-upload:before{content:"\f093"}.fa-ups:before{content:"\f7e0"}.fa-usb:before{content:"\f287"}.fa-user:before{content:"\f007"}.fa-user-alt:before{content:"\f406"}.fa-user-alt-slash:before{content:"\f4fa"}.fa-user-astronaut:before{content:"\f4fb"}.fa-user-check:before{content:"\f4fc"}.fa-user-circle:before{content:"\f2bd"}.fa-user-clock:before{content:"\f4fd"}.fa-user-cog:before{content:"\f4fe"}.fa-user-edit:before{content:"\f4ff"}.fa-user-friends:before{content:"\f500"}.fa-user-graduate:before{content:"\f501"}.fa-user-injured:before{content:"\f728"}.fa-user-lock:before{content:"\f502"}.fa-user-md:before{content:"\f0f0"}.fa-user-minus:before{content:"\f503"}.fa-user-ninja:before{content:"\f504"}.fa-user-nurse:before{content:"\f82f"}.fa-user-plus:before{content:"\f234"}.fa-user-secret:before{content:"\f21b"}.fa-user-shield:before{content:"\f505"}.fa-user-slash:before{content:"\f506"}.fa-user-tag:before{content:"\f507"}.fa-user-tie:before{content:"\f508"}.fa-user-times:before{content:"\f235"}.fa-users:before{content:"\f0c0"}.fa-users-cog:before{content:"\f509"}.fa-usps:before{content:"\f7e1"}.fa-ussunnah:before{content:"\f407"}.fa-utensil-spoon:before{content:"\f2e5"}.fa-utensils:before{content:"\f2e7"}.fa-vaadin:before{content:"\f408"}.fa-vector-square:before{content:"\f5cb"}.fa-venus:before{content:"\f221"}.fa-venus-double:before{content:"\f226"}.fa-venus-mars:before{content:"\f228"}.fa-viacoin:before{content:"\f237"}.fa-viadeo:before{content:"\f2a9"}.fa-viadeo-square:before{content:"\f2aa"}.fa-vial:before{content:"\f492"}.fa-vials:before{content:"\f493"}.fa-viber:before{content:"\f409"}.fa-video:before{content:"\f03d"}.fa-video-slash:before{content:"\f4e2"}.fa-vihara:before{content:"\f6a7"}.fa-vimeo:before{content:"\f40a"}.fa-vimeo-square:before{content:"\f194"}.fa-vimeo-v:before{content:"\f27d"}.fa-vine:before{content:"\f1ca"}.fa-virus:before{content:"\f974"}.fa-virus-slash:before{content:"\f975"}.fa-viruses:before{content:"\f976"}.fa-vk:before{content:"\f189"}.fa-vnv:before{content:"\f40b"}.fa-voicemail:before{content:"\f897"}.fa-volleyball-ball:before{content:"\f45f"}.fa-volume-down:before{content:"\f027"}.fa-volume-mute:before{content:"\f6a9"}.fa-volume-off:before{content:"\f026"}.fa-volume-up:before{content:"\f028"}.fa-vote-yea:before{content:"\f772"}.fa-vr-cardboard:before{content:"\f729"}.fa-vuejs:before{content:"\f41f"}.fa-walking:before{content:"\f554"}.fa-wallet:before{content:"\f555"}.fa-warehouse:before{content:"\f494"}.fa-water:before{content:"\f773"}.fa-wave-square:before{content:"\f83e"}.fa-waze:before{content:"\f83f"}.fa-weebly:before{content:"\f5cc"}.fa-weibo:before{content:"\f18a"}.fa-weight:before{content:"\f496"}.fa-weight-hanging:before{content:"\f5cd"}.fa-weixin:before{content:"\f1d7"}.fa-whatsapp:before{content:"\f232"}.fa-whatsapp-square:before{content:"\f40c"}.fa-wheelchair:before{content:"\f193"}.fa-whmcs:before{content:"\f40d"}.fa-wifi:before{content:"\f1eb"}.fa-wikipedia-w:before{content:"\f266"}.fa-wind:before{content:"\f72e"}.fa-window-close:before{content:"\f410"}.fa-window-maximize:before{content:"\f2d0"}.fa-window-minimize:before{content:"\f2d1"}.fa-window-restore:before{content:"\f2d2"}.fa-windows:before{content:"\f17a"}.fa-wine-bottle:before{content:"\f72f"}.fa-wine-glass:before{content:"\f4e3"}.fa-wine-glass-alt:before{content:"\f5ce"}.fa-wix:before{content:"\f5cf"}.fa-wizards-of-the-coast:before{content:"\f730"}.fa-wolf-pack-battalion:before{content:"\f514"}.fa-won-sign:before{content:"\f159"}.fa-wordpress:before{content:"\f19a"}.fa-wordpress-simple:before{content:"\f411"}.fa-wpbeginner:before{content:"\f297"}.fa-wpexplorer:before{content:"\f2de"}.fa-wpforms:before{content:"\f298"}.fa-wpressr:before{content:"\f3e4"}.fa-wrench:before{content:"\f0ad"}.fa-x-ray:before{content:"\f497"}.fa-xbox:before{content:"\f412"}.fa-xing:before{content:"\f168"}.fa-xing-square:before{content:"\f169"}.fa-y-combinator:before{content:"\f23b"}.fa-yahoo:before{content:"\f19e"}.fa-yammer:before{content:"\f840"}.fa-yandex:before{content:"\f413"}.fa-yandex-international:before{content:"\f414"}.fa-yarn:before{content:"\f7e3"}.fa-yelp:before{content:"\f1e9"}.fa-yen-sign:before{content:"\f157"}.fa-yin-yang:before{content:"\f6ad"}.fa-yoast:before{content:"\f2b1"}.fa-youtube:before{content:"\f167"}.fa-youtube-square:before{content:"\f431"}.fa-zhihu:before{content:"\f63f"}.sr-only{border:0;clip:rect(0,0,0,0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.sr-only-focusable:active,.sr-only-focusable:focus{clip:auto;height:auto;margin:0;overflow:visible;position:static;width:auto}@font-face{font-family:"Font Awesome 5 Brands";font-style:normal;font-weight:400;font-display:block;src:url(../webfonts/fa-brands-400.eot);src:url(../webfonts/fa-brands-400.eot?#iefix) format("embedded-opentype"),url(../webfonts/fa-brands-400.woff2) format("woff2"),url(../webfonts/fa-brands-400.woff) format("woff"),url(../webfonts/fa-brands-400.ttf) format("truetype"),url(../webfonts/fa-brands-400.svg#fontawesome) format("svg")}.fab{font-family:"Font Awesome 5 Brands"}@font-face{font-family:"Font Awesome 5 Free";font-style:normal;font-weight:400;font-display:block;src:url(../webfonts/fa-regular-400.eot);src:url(../webfonts/fa-regular-400.eot?#iefix) format("embedded-opentype"),url(../webfonts/fa-regular-400.woff2) format("woff2"),url(../webfonts/fa-regular-400.woff) format("woff"),url(../webfonts/fa-regular-400.ttf) format("truetype"),url(../webfonts/fa-regular-400.svg#fontawesome) format("svg")}.fab,.far{font-weight:400}@font-face{font-family:"Font Awesome 5 Free";font-style:normal;font-weight:900;font-display:block;src:url(../webfonts/fa-solid-900.eot);src:url(../webfonts/fa-solid-900.eot?#iefix) format("embedded-opentype"),url(../webfonts/fa-solid-900.woff2) format("woff2"),url(../webfonts/fa-solid-900.woff) format("woff"),url(../webfonts/fa-solid-900.ttf) format("truetype"),url(../webfonts/fa-solid-900.svg#fontawesome) format("svg")}.fa,.far,.fas{font-family:"Font Awesome 5 Free"}.fa,.fas{font-weight:900} \ No newline at end of file diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.eot b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.eot new file mode 100644 index 000000000..a1bc094ab Binary files /dev/null and b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.eot differ diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.svg b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.svg new file mode 100644 index 000000000..46ad237a6 --- /dev/null +++ b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.svg @@ -0,0 +1,3570 @@ + + + + + +Created by FontForge 20190801 at Mon Mar 23 10:45:51 2020 + By Robert Madole +Copyright (c) Font Awesome + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.ttf b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.ttf new file mode 100644 index 000000000..948a2a6cc Binary files /dev/null and b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.ttf differ diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff new file mode 100644 index 000000000..2a89d521e Binary files /dev/null and b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff differ diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2 b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2 new file mode 100644 index 000000000..141a90a9e Binary files /dev/null and b/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2 differ diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.eot b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.eot new file mode 100644 index 000000000..38cf2517a Binary files /dev/null and b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.eot differ diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.svg b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.svg new file mode 100644 index 000000000..48634a9ab --- /dev/null +++ b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.svg @@ -0,0 +1,803 @@ + + + + + +Created by FontForge 20190801 at Mon Mar 23 10:45:51 2020 + By Robert Madole +Copyright (c) Font Awesome + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.ttf b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.ttf new file mode 100644 index 000000000..abe99e20c Binary files /dev/null and b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.ttf differ diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff new file mode 100644 index 000000000..24de566a5 Binary files /dev/null and b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff differ diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff2 b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff2 new file mode 100644 index 000000000..7e0118e52 Binary files /dev/null and b/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff2 differ diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.eot b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.eot new file mode 100644 index 000000000..d3b77c223 Binary files /dev/null and b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.eot differ diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.svg b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.svg new file mode 100644 index 000000000..7742838b4 --- /dev/null +++ b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.svg @@ -0,0 +1,4938 @@ + + + + + +Created by FontForge 20190801 at Mon Mar 23 10:45:51 2020 + By Robert Madole +Copyright (c) Font Awesome + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.ttf b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.ttf new file mode 100644 index 000000000..5b979039a Binary files /dev/null and b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.ttf differ diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff new file mode 100644 index 000000000..beec79178 Binary files /dev/null and b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff differ diff --git a/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2 b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2 new file mode 100644 index 000000000..978a681a1 Binary files /dev/null and b/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2 differ diff --git a/_static/webpack-macros.html b/_static/webpack-macros.html new file mode 100644 index 000000000..1fe5b4734 --- /dev/null +++ b/_static/webpack-macros.html @@ -0,0 +1,28 @@ + + {% macro head_pre_icons() %} + + + + {% endmacro %} + + {% macro head_pre_fonts() %} + {% endmacro %} + + {% macro head_pre_bootstrap() %} + + + {% endmacro %} + + {% macro head_js_preload() %} + + {% endmacro %} + + {% macro body_post() %} + + {% endmacro %} \ No newline at end of file diff --git a/autoapi/datafusion/catalog/index.html b/autoapi/datafusion/catalog/index.html new file mode 100644 index 000000000..2defa8e95 --- /dev/null +++ b/autoapi/datafusion/catalog/index.html @@ -0,0 +1,1353 @@ + + + + + + + + datafusion.catalog — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

datafusion.catalog

+

Data catalog providers.

+
+

Classes

+ + + + + + + + + + + + + + + + + + + + + + + + +

Catalog

DataFusion data catalog.

CatalogList

DataFusion data catalog list.

CatalogProvider

Abstract class for defining a Python based Catalog Provider.

CatalogProviderList

Abstract class for defining a Python based Catalog Provider List.

Schema

DataFusion Schema.

SchemaProvider

Abstract class for defining a Python based Schema Provider.

Table

A DataFusion table.

+
+
+

Module Contents

+
+
+class datafusion.catalog.Catalog(catalog: datafusion._internal.catalog.RawCatalog)
+

DataFusion data catalog.

+

This constructor is not typically called by the end user.

+
+
+__repr__() str
+

Print a string representation of the catalog.

+
+ +
+
+database(name: str = 'public') Schema
+

Returns the database with the given name from this catalog.

+
+ +
+
+deregister_schema(name: str, cascade: bool = True) Schema | None
+

Deregister a schema from this catalog.

+
+ +
+
+static memory_catalog(ctx: datafusion.SessionContext | None = None) Catalog
+

Create an in-memory catalog provider.

+
+ +
+
+names() set[str]
+

This is an alias for schema_names.

+
+ +
+
+register_schema(name: str, schema: Schema | SchemaProvider | SchemaProviderExportable) Schema | None
+

Register a schema with this catalog.

+
+ +
+
+schema(name: str = 'public') Schema
+

Returns the database with the given name from this catalog.

+
+ +
+
+schema_names() set[str]
+

Returns the list of schemas in this catalog.

+
+ +
+
+catalog
+
+ +
+ +
+
+class datafusion.catalog.CatalogList(catalog_list: datafusion._internal.catalog.RawCatalogList)
+

DataFusion data catalog list.

+

This constructor is not typically called by the end user.

+
+
+__repr__() str
+

Print a string representation of the catalog list.

+
+ +
+
+catalog(name: str = 'datafusion') Catalog
+

Returns the catalog with the given name from this catalog.

+
+ +
+
+catalog_names() set[str]
+

Returns the list of schemas in this catalog.

+
+ +
+
+static memory_catalog(ctx: datafusion.SessionContext | None = None) CatalogList
+

Create an in-memory catalog provider list.

+
+ +
+
+names() set[str]
+

This is an alias for catalog_names.

+
+ +
+
+register_catalog(name: str, catalog: Catalog | CatalogProvider | CatalogProviderExportable) Catalog | None
+

Register a catalog with this catalog list.

+
+ +
+
+catalog_list
+
+ +
+ +
+
+class datafusion.catalog.CatalogProvider
+

Bases: abc.ABC

+

Abstract class for defining a Python based Catalog Provider.

+
+
+deregister_schema(name: str, cascade: bool) None
+

Remove a schema from this catalog.

+

This method is optional. If your catalog provides a fixed list of schemas, you +do not need to implement this method.

+
+
Parameters:
+
    +
  • name – The name of the schema to remove.

  • +
  • cascade – If true, deregister the tables within the schema.

  • +
+
+
+
+ +
+
+register_schema(name: str, schema: SchemaProviderExportable | SchemaProvider | Schema) None
+

Add a schema to this catalog.

+

This method is optional. If your catalog provides a fixed list of schemas, you +do not need to implement this method.

+
+ +
+
+abstract schema(name: str) Schema | None
+

Retrieve a specific schema from this catalog.

+
+ +
+
+abstract schema_names() set[str]
+

Set of the names of all schemas in this catalog.

+
+ +
+ +
+
+class datafusion.catalog.CatalogProviderList
+

Bases: abc.ABC

+

Abstract class for defining a Python based Catalog Provider List.

+
+
+abstract catalog(name: str) CatalogProviderExportable | CatalogProvider | Catalog | None
+

Retrieve a specific catalog from this catalog list.

+
+ +
+
+abstract catalog_names() set[str]
+

Set of the names of all catalogs in this catalog list.

+
+ +
+
+register_catalog(name: str, catalog: CatalogProviderExportable | CatalogProvider | Catalog) None
+

Add a catalog to this catalog list.

+

This method is optional. If your catalog provides a fixed list of catalogs, you +do not need to implement this method.

+
+ +
+ +
+
+class datafusion.catalog.Schema(schema: datafusion._internal.catalog.RawSchema)
+

DataFusion Schema.

+

This constructor is not typically called by the end user.

+
+
+__repr__() str
+

Print a string representation of the schema.

+
+ +
+
+deregister_table(name: str) None
+

Deregister a table provider from this schema.

+
+ +
+
+static memory_schema(ctx: datafusion.SessionContext | None = None) Schema
+

Create an in-memory schema provider.

+
+ +
+
+names() set[str]
+

This is an alias for table_names.

+
+ +
+
+register_table(name: str, table: Table | datafusion.context.TableProviderExportable | datafusion.DataFrame | pyarrow.dataset.Dataset) None
+

Register a table in this schema.

+
+ +
+
+table(name: str) Table
+

Return the table with the given name from this schema.

+
+ +
+
+table_exist(name: str) bool
+

Determines if a table exists in this schema.

+
+ +
+
+table_names() set[str]
+

Returns the list of all tables in this schema.

+
+ +
+
+_raw_schema
+
+ +
+ +
+
+class datafusion.catalog.SchemaProvider
+

Bases: abc.ABC

+

Abstract class for defining a Python based Schema Provider.

+
+
+deregister_table(name: str, cascade: bool) None
+

Remove a table from this schema.

+

This method is optional. If your schema provides a fixed list of tables, you do +not need to implement this method.

+
+ +
+
+owner_name() str | None
+

Returns the owner of the schema.

+

This is an optional method. The default return is None.

+
+ +
+
+register_table(name: str, table: Table | datafusion.context.TableProviderExportable | Any) None
+

Add a table to this schema.

+

This method is optional. If your schema provides a fixed list of tables, you do +not need to implement this method.

+
+ +
+
+abstract table(name: str) Table | None
+

Retrieve a specific table from this schema.

+
+ +
+
+abstract table_exist(name: str) bool
+

Returns true if the table exists in this schema.

+
+ +
+
+abstract table_names() set[str]
+

Set of the names of all tables in this schema.

+
+ +
+ +
+
+class datafusion.catalog.Table(table: Table | datafusion.context.TableProviderExportable | datafusion.DataFrame | pyarrow.dataset.Dataset, ctx: datafusion.SessionContext | None = None)
+

A DataFusion table.

+

Internally we currently support the following types of tables:

+
    +
  • Tables created using built-in DataFusion methods, such as +reading from CSV or Parquet

  • +
  • pyarrow datasets

  • +
  • DataFusion DataFrames, which will be converted into a view

  • +
  • Externally provided tables implemented with the FFI PyCapsule +interface (advanced)

  • +
+

Constructor.

+
+
+__repr__() str
+

Print a string representation of the table.

+
+ +
+
+static from_dataset(dataset: pyarrow.dataset.Dataset) Table
+

Turn a pyarrow.dataset Dataset into a Table.

+
+ +
+
+__slots__ = ('_inner',)
+
+ +
+
+_inner
+
+ +
+
+property kind: str
+

Returns the kind of table.

+
+ +
+
+property schema: pyarrow.Schema
+

Returns the schema associated with this table.

+
+ +
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/context/index.html b/autoapi/datafusion/context/index.html new file mode 100644 index 000000000..fba3608b3 --- /dev/null +++ b/autoapi/datafusion/context/index.html @@ -0,0 +1,2462 @@ + + + + + + + + datafusion.context — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

datafusion.context

+

Session Context and it’s associated configuration.

+
+

Classes

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

ArrowArrayExportable

Type hint for object exporting Arrow C Array via Arrow PyCapsule Interface.

ArrowStreamExportable

Type hint for object exporting Arrow C Stream via Arrow PyCapsule Interface.

RuntimeConfig

See RuntimeEnvBuilder.

RuntimeEnvBuilder

Runtime configuration options.

SQLOptions

Options to be used when performing SQL queries.

SessionConfig

Session configuration options.

SessionContext

This is the main interface for executing queries and creating DataFrames.

TableProviderExportable

Type hint for object that has __datafusion_table_provider__ PyCapsule.

+
+
+

Module Contents

+
+
+class datafusion.context.ArrowArrayExportable
+

Bases: Protocol

+

Type hint for object exporting Arrow C Array via Arrow PyCapsule Interface.

+

https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html

+
+
+__arrow_c_array__(requested_schema: object | None = None) tuple[object, object]
+
+ +
+ +
+
+class datafusion.context.ArrowStreamExportable
+

Bases: Protocol

+

Type hint for object exporting Arrow C Stream via Arrow PyCapsule Interface.

+

https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html

+
+
+__arrow_c_stream__(requested_schema: object | None = None) object
+
+ +
+ +
+
+class datafusion.context.RuntimeConfig
+

Bases: RuntimeEnvBuilder

+

See RuntimeEnvBuilder.

+

Create a new RuntimeEnvBuilder with default values.

+
+ +
+
+class datafusion.context.RuntimeEnvBuilder
+

Runtime configuration options.

+

Create a new RuntimeEnvBuilder with default values.

+
+
+with_disk_manager_disabled() RuntimeEnvBuilder
+

Disable the disk manager, attempts to create temporary files will error.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+
+ +
+
+with_disk_manager_os() RuntimeEnvBuilder
+

Use the operating system’s temporary directory for disk manager.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+
+ +
+
+with_disk_manager_specified(*paths: str | pathlib.Path) RuntimeEnvBuilder
+

Use the specified paths for the disk manager’s temporary files.

+
+
Parameters:
+

paths – Paths to use for the disk manager’s temporary files.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+
+ +
+
+with_fair_spill_pool(size: int) RuntimeEnvBuilder
+

Use a fair spill pool with the specified size.

+

This pool works best when you know beforehand the query has multiple spillable +operators that will likely all need to spill. Sometimes it will cause spills +even when there was sufficient memory (reserved for other operators) to avoid +doing so:

+
┌───────────────────────z──────────────────────z───────────────┐
+│                       z                      z               │
+│                       z                      z               │
+│       Spillable       z       Unspillable    z     Free      │
+│        Memory         z        Memory        z    Memory     │
+│                       z                      z               │
+│                       z                      z               │
+└───────────────────────z──────────────────────z───────────────┘
+
+
+
+
Parameters:
+

size – Size of the memory pool in bytes.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+

Examples usage:

+
config = RuntimeEnvBuilder().with_fair_spill_pool(1024)
+
+
+
+ +
+
+with_greedy_memory_pool(size: int) RuntimeEnvBuilder
+

Use a greedy memory pool with the specified size.

+

This pool works well for queries that do not need to spill or have a single +spillable operator. See with_fair_spill_pool() if there are +multiple spillable operators that all will spill.

+
+
Parameters:
+

size – Size of the memory pool in bytes.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+

Example usage:

+
config = RuntimeEnvBuilder().with_greedy_memory_pool(1024)
+
+
+
+ +
+
+with_temp_file_path(path: str | pathlib.Path) RuntimeEnvBuilder
+

Use the specified path to create any needed temporary files.

+
+
Parameters:
+

path – Path to use for temporary files.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+

Example usage:

+
config = RuntimeEnvBuilder().with_temp_file_path("/tmp")
+
+
+
+ +
+
+with_unbounded_memory_pool() RuntimeEnvBuilder
+

Use an unbounded memory pool.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+
+ +
+
+config_internal
+
+ +
+ +
+
+class datafusion.context.SQLOptions
+

Options to be used when performing SQL queries.

+

Create a new SQLOptions with default values.

+

The default values are: +- DDL commands are allowed +- DML commands are allowed +- Statements are allowed

+
+
+with_allow_ddl(allow: bool = True) SQLOptions
+

Should DDL (Data Definition Language) commands be run?

+

Examples of DDL commands include CREATE TABLE and DROP TABLE.

+
+
Parameters:
+

allow – Allow DDL commands to be run.

+
+
Returns:
+

A new SQLOptions object with the updated setting.

+
+
+

Example usage:

+
options = SQLOptions().with_allow_ddl(True)
+
+
+
+ +
+
+with_allow_dml(allow: bool = True) SQLOptions
+

Should DML (Data Manipulation Language) commands be run?

+

Examples of DML commands include INSERT INTO and DELETE.

+
+
Parameters:
+

allow – Allow DML commands to be run.

+
+
Returns:
+

A new SQLOptions object with the updated setting.

+
+
+

Example usage:

+
options = SQLOptions().with_allow_dml(True)
+
+
+
+ +
+
+with_allow_statements(allow: bool = True) SQLOptions
+

Should statements such as SET VARIABLE and BEGIN TRANSACTION be run?

+
+
Parameters:
+

allow – Allow statements to be run.

+
+
Returns:
+

py:class:SQLOptions` object with the updated setting.

+
+
Return type:
+

A new

+
+
+

Example usage:

+
options = SQLOptions().with_allow_statements(True)
+
+
+
+ +
+
+options_internal
+
+ +
+ +
+
+class datafusion.context.SessionConfig(config_options: dict[str, str] | None = None)
+

Session configuration options.

+

Create a new SessionConfig with the given configuration options.

+
+
Parameters:
+

config_options – Configuration options.

+
+
+
+
+set(key: str, value: str) SessionConfig
+

Set a configuration option.

+

Args: +key: Option key. +value: Option value.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_batch_size(batch_size: int) SessionConfig
+

Customize batch size.

+
+
Parameters:
+

batch_size – Batch size.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_create_default_catalog_and_schema(enabled: bool = True) SessionConfig
+

Control if the default catalog and schema will be automatically created.

+
+
Parameters:
+

enabled – Whether the default catalog and schema will be +automatically created.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_default_catalog_and_schema(catalog: str, schema: str) SessionConfig
+

Select a name for the default catalog and schema.

+
+
Parameters:
+
    +
  • catalog – Catalog name.

  • +
  • schema – Schema name.

  • +
+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_information_schema(enabled: bool = True) SessionConfig
+

Enable or disable the inclusion of information_schema virtual tables.

+
+
Parameters:
+

enabled – Whether to include information_schema virtual tables.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_parquet_pruning(enabled: bool = True) SessionConfig
+

Enable or disable the use of pruning predicate for parquet readers.

+

Pruning predicates will enable the reader to skip row groups.

+
+
Parameters:
+

enabled – Whether to use pruning predicate for parquet readers.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_repartition_aggregations(enabled: bool = True) SessionConfig
+

Enable or disable the use of repartitioning for aggregations.

+

Enabling this improves parallelism.

+
+
Parameters:
+

enabled – Whether to use repartitioning for aggregations.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_repartition_file_min_size(size: int) SessionConfig
+

Set minimum file range size for repartitioning scans.

+
+
Parameters:
+

size – Minimum file range size.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_repartition_file_scans(enabled: bool = True) SessionConfig
+

Enable or disable the use of repartitioning for file scans.

+
+
Parameters:
+

enabled – Whether to use repartitioning for file scans.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_repartition_joins(enabled: bool = True) SessionConfig
+

Enable or disable the use of repartitioning for joins to improve parallelism.

+
+
Parameters:
+

enabled – Whether to use repartitioning for joins.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_repartition_sorts(enabled: bool = True) SessionConfig
+

Enable or disable the use of repartitioning for window functions.

+

This may improve parallelism.

+
+
Parameters:
+

enabled – Whether to use repartitioning for window functions.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_repartition_windows(enabled: bool = True) SessionConfig
+

Enable or disable the use of repartitioning for window functions.

+

This may improve parallelism.

+
+
Parameters:
+

enabled – Whether to use repartitioning for window functions.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_target_partitions(target_partitions: int) SessionConfig
+

Customize the number of target partitions for query execution.

+

Increasing partitions can increase concurrency.

+
+
Parameters:
+

target_partitions – Number of target partitions.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+config_internal
+
+ +
+ +
+
+class datafusion.context.SessionContext(config: SessionConfig | None = None, runtime: RuntimeEnvBuilder | None = None)
+

This is the main interface for executing queries and creating DataFrames.

+

See Concepts in the online documentation for more information.

+

Main interface for executing queries with DataFusion.

+

Maintains the state of the connection between a user and an instance +of the connection between a user and an instance of the DataFusion +engine.

+
+
Parameters:
+
    +
  • config – Session configuration options.

  • +
  • runtime – Runtime configuration options.

  • +
+
+
+

Example usage:

+

The following example demonstrates how to use the context to execute +a query against a CSV data source using the DataFrame API:

+
from datafusion import SessionContext
+
+ctx = SessionContext()
+df = ctx.read_csv("data.csv")
+
+
+
+
+__datafusion_logical_extension_codec__() Any
+

Access the PyCapsule FFI_LogicalExtensionCodec.

+
+ +
+
+__datafusion_task_context_provider__() Any
+

Access the PyCapsule FFI_TaskContextProvider.

+
+ +
+
+__repr__() str
+

Print a string representation of the Session Context.

+
+ +
+
+static _convert_file_sort_order(file_sort_order: collections.abc.Sequence[collections.abc.Sequence[datafusion.expr.SortKey]] | None) list[list[datafusion._internal.expr.SortExpr]] | None
+

Convert nested SortKey sequences into raw sort expressions.

+

Each SortKey can be a column name string, an Expr, or a +SortExpr and will be converted using +datafusion.expr.sort_list_to_raw_sort_list().

+
+ +
+
+static _convert_table_partition_cols(table_partition_cols: list[tuple[str, str | pyarrow.DataType]]) list[tuple[str, pyarrow.DataType]]
+
+ +
+
+catalog(name: str = 'datafusion') datafusion.catalog.Catalog
+

Retrieve a catalog by name.

+
+ +
+
+catalog_names() set[str]
+

Returns the list of catalogs in this context.

+
+ +
+
+create_dataframe(partitions: list[list[pyarrow.RecordBatch]], name: str | None = None, schema: pyarrow.Schema | None = None) datafusion.dataframe.DataFrame
+

Create and return a dataframe using the provided partitions.

+
+
Parameters:
+
    +
  • partitionspa.RecordBatch partitions to register.

  • +
  • name – Resultant dataframe name.

  • +
  • schema – Schema for the partitions.

  • +
+
+
Returns:
+

DataFrame representation of the SQL query.

+
+
+
+ +
+
+create_dataframe_from_logical_plan(plan: datafusion.plan.LogicalPlan) datafusion.dataframe.DataFrame
+

Create a DataFrame from an existing plan.

+
+
Parameters:
+

plan – Logical plan.

+
+
Returns:
+

DataFrame representation of the logical plan.

+
+
+
+ +
+
+deregister_table(name: str) None
+

Remove a table from the session.

+
+ +
+
+empty_table() datafusion.dataframe.DataFrame
+

Create an empty DataFrame.

+
+ +
+
+enable_url_table() SessionContext
+

Control if local files can be queried as tables.

+
+
Returns:
+

A new SessionContext object with url table enabled.

+
+
+
+ +
+
+execute(plan: datafusion.plan.ExecutionPlan, partitions: int) datafusion.record_batch.RecordBatchStream
+

Execute the plan and return the results.

+
+ +
+
+from_arrow(data: ArrowStreamExportable | ArrowArrayExportable, name: str | None = None) datafusion.dataframe.DataFrame
+

Create a DataFrame from an Arrow source.

+

The Arrow data source can be any object that implements either +__arrow_c_stream__ or __arrow_c_array__. For the latter, it must return +a struct array.

+

Arrow data can be Polars, Pandas, Pyarrow etc.

+
+
Parameters:
+
    +
  • data – Arrow data source.

  • +
  • name – Name of the DataFrame.

  • +
+
+
Returns:
+

DataFrame representation of the Arrow table.

+
+
+
+ +
+
+from_arrow_table(data: pyarrow.Table, name: str | None = None) datafusion.dataframe.DataFrame
+

Create a DataFrame from an Arrow table.

+

This is an alias for from_arrow().

+
+ +
+
+from_pandas(data: pandas.DataFrame, name: str | None = None) datafusion.dataframe.DataFrame
+

Create a DataFrame from a Pandas DataFrame.

+
+
Parameters:
+
    +
  • data – Pandas DataFrame.

  • +
  • name – Name of the DataFrame.

  • +
+
+
Returns:
+

DataFrame representation of the Pandas DataFrame.

+
+
+
+ +
+
+from_polars(data: polars.DataFrame, name: str | None = None) datafusion.dataframe.DataFrame
+

Create a DataFrame from a Polars DataFrame.

+
+
Parameters:
+
    +
  • data – Polars DataFrame.

  • +
  • name – Name of the DataFrame.

  • +
+
+
Returns:
+

DataFrame representation of the Polars DataFrame.

+
+
+
+ +
+
+from_pydict(data: dict[str, list[Any]], name: str | None = None) datafusion.dataframe.DataFrame
+

Create a DataFrame from a dictionary.

+
+
Parameters:
+
    +
  • data – Dictionary of lists.

  • +
  • name – Name of the DataFrame.

  • +
+
+
Returns:
+

DataFrame representation of the dictionary of lists.

+
+
+
+ +
+
+from_pylist(data: list[dict[str, Any]], name: str | None = None) datafusion.dataframe.DataFrame
+

Create a DataFrame from a list.

+
+
Parameters:
+
    +
  • data – List of dictionaries.

  • +
  • name – Name of the DataFrame.

  • +
+
+
Returns:
+

DataFrame representation of the list of dictionaries.

+
+
+
+ +
+
+classmethod global_ctx() SessionContext
+

Retrieve the global context as a SessionContext wrapper.

+
+
Returns:
+

A SessionContext object that wraps the global SessionContextInternal.

+
+
+
+ +
+
+read_avro(path: str | pathlib.Path, schema: pyarrow.Schema | None = None, file_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_extension: str = '.avro') datafusion.dataframe.DataFrame
+

Create a DataFrame for reading Avro data source.

+
+
Parameters:
+
    +
  • path – Path to the Avro file.

  • +
  • schema – The data source schema.

  • +
  • file_partition_cols – Partition columns.

  • +
  • file_extension – File extension to select.

  • +
+
+
Returns:
+

DataFrame representation of the read Avro file

+
+
+
+ +
+
+read_csv(path: str | pathlib.Path | list[str] | list[pathlib.Path], schema: pyarrow.Schema | None = None, has_header: bool = True, delimiter: str = ',', schema_infer_max_records: int = DEFAULT_MAX_INFER_SCHEMA, file_extension: str = '.csv', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None, options: datafusion.options.CsvReadOptions | None = None) datafusion.dataframe.DataFrame
+

Read a CSV data source.

+
+
Parameters:
+
    +
  • path – Path to the CSV file

  • +
  • schema – An optional schema representing the CSV files. If None, the +CSV reader will try to infer it based on data in file.

  • +
  • has_header – Whether the CSV file have a header. If schema inference +is run on a file with no headers, default column names are +created.

  • +
  • delimiter – An optional column delimiter.

  • +
  • schema_infer_max_records – Maximum number of rows to read from CSV +files for schema inference if needed.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • table_partition_cols – Partition columns.

  • +
  • file_compression_type – File compression type.

  • +
  • options – Set advanced options for CSV reading. This cannot be +combined with any of the other options in this method.

  • +
+
+
Returns:
+

DataFrame representation of the read CSV files

+
+
+
+ +
+
+read_json(path: str | pathlib.Path, schema: pyarrow.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = '.json', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None) datafusion.dataframe.DataFrame
+

Read a line-delimited JSON data source.

+
+
Parameters:
+
    +
  • path – Path to the JSON file.

  • +
  • schema – The data source schema.

  • +
  • schema_infer_max_records – Maximum number of rows to read from JSON +files for schema inference if needed.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • table_partition_cols – Partition columns.

  • +
  • file_compression_type – File compression type.

  • +
+
+
Returns:
+

DataFrame representation of the read JSON files.

+
+
+
+ +
+
+read_parquet(path: str | pathlib.Path, table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, parquet_pruning: bool = True, file_extension: str = '.parquet', skip_metadata: bool = True, schema: pyarrow.Schema | None = None, file_sort_order: collections.abc.Sequence[collections.abc.Sequence[datafusion.expr.SortKey]] | None = None) datafusion.dataframe.DataFrame
+

Read a Parquet source into a Dataframe.

+
+
Parameters:
+
    +
  • path – Path to the Parquet file.

  • +
  • table_partition_cols – Partition columns.

  • +
  • parquet_pruning – Whether the parquet reader should use the predicate +to prune row groups.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • skip_metadata – Whether the parquet reader should skip any metadata +that may be in the file schema. This can help avoid schema +conflicts due to metadata.

  • +
  • schema – An optional schema representing the parquet files. If None, +the parquet reader will try to infer it based on data in the +file.

  • +
  • file_sort_order – Sort order for the file. Each sort key can be +specified as a column name (str), an expression +(Expr), or a SortExpr.

  • +
+
+
Returns:
+

DataFrame representation of the read Parquet files

+
+
+
+ +
+
+read_table(table: datafusion.catalog.Table | TableProviderExportable | datafusion.dataframe.DataFrame | pyarrow.dataset.Dataset) datafusion.dataframe.DataFrame
+

Creates a DataFrame from a table.

+
+ +
+
+register_avro(name: str, path: str | pathlib.Path, schema: pyarrow.Schema | None = None, file_extension: str = '.avro', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None) None
+

Register an Avro file as a table.

+

The registered table can be referenced from SQL statement executed against +this context.

+
+
Parameters:
+
    +
  • name – Name of the table to register.

  • +
  • path – Path to the Avro file.

  • +
  • schema – The data source schema.

  • +
  • file_extension – File extension to select.

  • +
  • table_partition_cols – Partition columns.

  • +
+
+
+
+ +
+
+register_catalog_provider(name: str, provider: datafusion.catalog.CatalogProviderExportable | datafusion.catalog.CatalogProvider | datafusion.catalog.Catalog) None
+

Register a catalog provider.

+
+ +
+
+register_catalog_provider_list(provider: datafusion.catalog.CatalogProviderListExportable | datafusion.catalog.CatalogProviderList | datafusion.catalog.CatalogList) None
+

Register a catalog provider list.

+
+ +
+
+register_csv(name: str, path: str | pathlib.Path | list[str | pathlib.Path], schema: pyarrow.Schema | None = None, has_header: bool = True, delimiter: str = ',', schema_infer_max_records: int = DEFAULT_MAX_INFER_SCHEMA, file_extension: str = '.csv', file_compression_type: str | None = None, options: datafusion.options.CsvReadOptions | None = None) None
+

Register a CSV file as a table.

+

The registered table can be referenced from SQL statement executed against.

+
+
Parameters:
+
    +
  • name – Name of the table to register.

  • +
  • path – Path to the CSV file. It also accepts a list of Paths.

  • +
  • schema – An optional schema representing the CSV file. If None, the +CSV reader will try to infer it based on data in file.

  • +
  • has_header – Whether the CSV file have a header. If schema inference +is run on a file with no headers, default column names are +created.

  • +
  • delimiter – An optional column delimiter.

  • +
  • schema_infer_max_records – Maximum number of rows to read from CSV +files for schema inference if needed.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • file_compression_type – File compression type.

  • +
  • options – Set advanced options for CSV reading. This cannot be +combined with any of the other options in this method.

  • +
+
+
+
+ +
+
+register_dataset(name: str, dataset: pyarrow.dataset.Dataset) None
+

Register a pa.dataset.Dataset as a table.

+
+
Parameters:
+
    +
  • name – Name of the table to register.

  • +
  • dataset – PyArrow dataset.

  • +
+
+
+
+ +
+
+register_json(name: str, path: str | pathlib.Path, schema: pyarrow.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = '.json', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None) None
+

Register a JSON file as a table.

+

The registered table can be referenced from SQL statement executed +against this context.

+
+
Parameters:
+
    +
  • name – Name of the table to register.

  • +
  • path – Path to the JSON file.

  • +
  • schema – The data source schema.

  • +
  • schema_infer_max_records – Maximum number of rows to read from JSON +files for schema inference if needed.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • table_partition_cols – Partition columns.

  • +
  • file_compression_type – File compression type.

  • +
+
+
+
+ +
+
+register_listing_table(name: str, path: str | pathlib.Path, table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_extension: str = '.parquet', schema: pyarrow.Schema | None = None, file_sort_order: collections.abc.Sequence[collections.abc.Sequence[datafusion.expr.SortKey]] | None = None) None
+

Register multiple files as a single table.

+

Registers a Table that can assemble multiple +files from locations in an ObjectStore +instance.

+
+
Parameters:
+
    +
  • name – Name of the resultant table.

  • +
  • path – Path to the file to register.

  • +
  • table_partition_cols – Partition columns.

  • +
  • file_extension – File extension of the provided table.

  • +
  • schema – The data source schema.

  • +
  • file_sort_order – Sort order for the file. Each sort key can be +specified as a column name (str), an expression +(Expr), or a SortExpr.

  • +
+
+
+
+ +
+
+register_object_store(schema: str, store: Any, host: str | None = None) None
+

Add a new object store into the session.

+
+
Parameters:
+
    +
  • schema – The data source schema.

  • +
  • store – The ObjectStore to register.

  • +
  • host – URL for the host.

  • +
+
+
+
+ +
+
+register_parquet(name: str, path: str | pathlib.Path, table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, parquet_pruning: bool = True, file_extension: str = '.parquet', skip_metadata: bool = True, schema: pyarrow.Schema | None = None, file_sort_order: collections.abc.Sequence[collections.abc.Sequence[datafusion.expr.SortKey]] | None = None) None
+

Register a Parquet file as a table.

+

The registered table can be referenced from SQL statement executed +against this context.

+
+
Parameters:
+
    +
  • name – Name of the table to register.

  • +
  • path – Path to the Parquet file.

  • +
  • table_partition_cols – Partition columns.

  • +
  • parquet_pruning – Whether the parquet reader should use the +predicate to prune row groups.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • skip_metadata – Whether the parquet reader should skip any metadata +that may be in the file schema. This can help avoid schema +conflicts due to metadata.

  • +
  • schema – The data source schema.

  • +
  • file_sort_order – Sort order for the file. Each sort key can be +specified as a column name (str), an expression +(Expr), or a SortExpr.

  • +
+
+
+
+ +
+
+register_record_batches(name: str, partitions: list[list[pyarrow.RecordBatch]]) None
+

Register record batches as a table.

+

This function will convert the provided partitions into a table and +register it into the session using the given name.

+
+
Parameters:
+
    +
  • name – Name of the resultant table.

  • +
  • partitions – Record batches to register as a table.

  • +
+
+
+
+ +
+
+register_table(name: str, table: datafusion.catalog.Table | TableProviderExportable | datafusion.dataframe.DataFrame | pyarrow.dataset.Dataset) None
+

Register a Table with this context.

+

The registered table can be referenced from SQL statements executed against +this context.

+
+
Parameters:
+
    +
  • name – Name of the resultant table.

  • +
  • table – Any object that can be converted into a Table.

  • +
+
+
+
+ +
+
+register_table_provider(name: str, provider: datafusion.catalog.Table | TableProviderExportable | datafusion.dataframe.DataFrame | pyarrow.dataset.Dataset) None
+

Register a table provider.

+

Deprecated: use register_table() instead.

+
+ +
+
+register_udaf(udaf: datafusion.user_defined.AggregateUDF) None
+

Register a user-defined aggregation function (UDAF) with the context.

+
+ +
+
+register_udf(udf: datafusion.user_defined.ScalarUDF) None
+

Register a user-defined function (UDF) with the context.

+
+ +
+
+register_udtf(func: datafusion.user_defined.TableFunction) None
+

Register a user defined table function.

+
+ +
+
+register_udwf(udwf: datafusion.user_defined.WindowUDF) None
+

Register a user-defined window function (UDWF) with the context.

+
+ +
+
+register_view(name: str, df: datafusion.dataframe.DataFrame) None
+

Register a DataFrame as a view.

+
+
Parameters:
+
    +
  • name (str) – The name to register the view under.

  • +
  • df (DataFrame) – The DataFrame to be converted into a view and registered.

  • +
+
+
+
+ +
+
+session_id() str
+

Return an id that uniquely identifies this SessionContext.

+
+ +
+
+sql(query: str, options: SQLOptions | None = None, param_values: dict[str, Any] | None = None, **named_params: Any) datafusion.dataframe.DataFrame
+

Create a DataFrame from SQL query text.

+

See the online documentation for a description of how to perform +parameterized substitution via either the param_values option +or passing in named_params.

+

Note: This API implements DDL statements such as CREATE TABLE and +CREATE VIEW and DML statements such as INSERT INTO with in-memory +default implementation.See +sql_with_options().

+
+
Parameters:
+
    +
  • query – SQL query text.

  • +
  • options – If provided, the query will be validated against these options.

  • +
  • param_values – Provides substitution of scalar values in the query +after parsing.

  • +
  • named_params – Provides string or DataFrame substitution in the query string.

  • +
+
+
Returns:
+

DataFrame representation of the SQL query.

+
+
+
+ +
+
+sql_with_options(query: str, options: SQLOptions, param_values: dict[str, Any] | None = None, **named_params: Any) datafusion.dataframe.DataFrame
+

Create a DataFrame from SQL query text.

+

This function will first validate that the query is allowed by the +provided options.

+
+
Parameters:
+
    +
  • query – SQL query text.

  • +
  • options – SQL options.

  • +
  • param_values – Provides substitution of scalar values in the query +after parsing.

  • +
  • named_params – Provides string or DataFrame substitution in the query string.

  • +
+
+
Returns:
+

DataFrame representation of the SQL query.

+
+
+
+ +
+
+table(name: str) datafusion.dataframe.DataFrame
+

Retrieve a previously registered table by name.

+
+ +
+
+table_exist(name: str) bool
+

Return whether a table with the given name exists.

+
+ +
+
+with_logical_extension_codec(codec: Any) SessionContext
+

Create a new session context with specified codec.

+

This only supports codecs that have been implemented using the +FFI interface.

+
+ +
+
+ctx
+
+ +
+ +
+
+class datafusion.context.TableProviderExportable
+

Bases: Protocol

+

Type hint for object that has __datafusion_table_provider__ PyCapsule.

+

https://datafusion.apache.org/python/user-guide/io/table_provider.html

+
+
+__datafusion_table_provider__(session: Any) object
+
+ +
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/dataframe/index.html b/autoapi/datafusion/dataframe/index.html new file mode 100644 index 000000000..75b0c0c40 --- /dev/null +++ b/autoapi/datafusion/dataframe/index.html @@ -0,0 +1,2842 @@ + + + + + + + + datafusion.dataframe — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

datafusion.dataframe

+

DataFrame is one of the core concepts in DataFusion.

+

See Concepts in the online documentation for more information.

+
+

Classes

+ + + + + + + + + + + + + + + + + + + + + +

Compression

Enum representing the available compression types for Parquet files.

DataFrame

Two dimensional table representation of data.

DataFrameWriteOptions

Writer options for DataFrame.

InsertOp

Insert operation mode.

ParquetColumnOptions

Parquet options for individual columns.

ParquetWriterOptions

Advanced parquet writer options.

+
+
+

Module Contents

+
+
+class datafusion.dataframe.Compression
+

Bases: enum.Enum

+

Enum representing the available compression types for Parquet files.

+
+
+classmethod from_str(value: str) Compression
+

Convert a string to a Compression enum value.

+
+
Parameters:
+

value – The string representation of the compression type.

+
+
Returns:
+

The Compression enum lowercase value.

+
+
Raises:
+

ValueError – If the string does not match any Compression enum value.

+
+
+
+ +
+
+get_default_level() int | None
+

Get the default compression level for the compression type.

+
+
Returns:
+

The default compression level for the compression type.

+
+
+
+ +
+
+BROTLI = 'brotli'
+
+ +
+
+GZIP = 'gzip'
+
+ +
+
+LZ4 = 'lz4'
+
+ +
+
+LZ4_RAW = 'lz4_raw'
+
+ +
+
+SNAPPY = 'snappy'
+
+ +
+
+UNCOMPRESSED = 'uncompressed'
+
+ +
+
+ZSTD = 'zstd'
+
+ +
+ +
+
+class datafusion.dataframe.DataFrame(df: datafusion._internal.DataFrame)
+

Two dimensional table representation of data.

+

DataFrame objects are iterable; iterating over a DataFrame yields +datafusion.RecordBatch instances lazily.

+

See Concepts in the online documentation for more information.

+

This constructor is not to be used by the end user.

+

See SessionContext for methods to +create a DataFrame.

+
+
+__aiter__() collections.abc.AsyncIterator[datafusion.record_batch.RecordBatch]
+

Return an async iterator over this DataFrame’s record batches.

+

We’re using __aiter__ because we support Python < 3.10 where aiter() is not +available.

+
+ +
+
+__arrow_c_stream__(requested_schema: object | None = None) object
+

Export the DataFrame as an Arrow C Stream.

+

The DataFrame is executed using DataFusion’s streaming APIs and exposed via +Arrow’s C Stream interface. Record batches are produced incrementally, so the +full result set is never materialized in memory.

+

When requested_schema is provided, DataFusion applies only simple +projections such as selecting a subset of existing columns or reordering +them. Column renaming, computed expressions, or type coercion are not +supported through this interface.

+
+
Parameters:
+

requested_schema – Either a pyarrow.Schema or an Arrow C +Schema capsule (PyCapsule) produced by +schema._export_to_c_capsule(). The DataFrame will attempt to +align its output with the fields and order specified by this schema.

+
+
Returns:
+

Arrow PyCapsule object representing an ArrowArrayStream.

+
+
+

For practical usage patterns, see the Apache Arrow streaming +documentation: https://arrow.apache.org/docs/python/ipc.html#streaming.

+

For details on DataFusion’s Arrow integration and DataFrame streaming, +see the user guide (user-guide/io/arrow and user-guide/dataframe/index).

+

Notes

+

The Arrow C Data Interface PyCapsule details are documented by Apache +Arrow and can be found at: +https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html

+
+ +
+
+__getitem__(key: str | list[str]) DataFrame
+

Return a new DataFrame with the specified column or columns.

+
+
Parameters:
+

key – Column name or list of column names to select.

+
+
Returns:
+

DataFrame with the specified column or columns.

+
+
+
+ +
+
+__iter__() collections.abc.Iterator[datafusion.record_batch.RecordBatch]
+

Return an iterator over this DataFrame’s record batches.

+
+ +
+
+__repr__() str
+

Return a string representation of the DataFrame.

+
+
Returns:
+

String representation of the DataFrame.

+
+
+
+ +
+
+_repr_html_() str
+
+ +
+
+aggregate(group_by: collections.abc.Sequence[datafusion.expr.Expr | str] | datafusion.expr.Expr | str, aggs: collections.abc.Sequence[datafusion.expr.Expr] | datafusion.expr.Expr) DataFrame
+

Aggregates the rows of the current DataFrame.

+
+
Parameters:
+
    +
  • group_by – Sequence of expressions or column names to group by.

  • +
  • aggs – Sequence of expressions to aggregate.

  • +
+
+
Returns:
+

DataFrame after aggregation.

+
+
+
+ +
+
+cache() DataFrame
+

Cache the DataFrame as a memory table.

+
+
Returns:
+

Cached DataFrame.

+
+
+
+ +
+
+cast(mapping: dict[str, pyarrow.DataType[Any]]) DataFrame
+

Cast one or more columns to a different data type.

+
+
Parameters:
+

mapping – Mapped with column as key and column dtype as value.

+
+
Returns:
+

DataFrame after casting columns

+
+
+
+ +
+
+collect() list[pyarrow.RecordBatch]
+

Execute this DataFrame and collect results into memory.

+

Prior to calling collect, modifying a DataFrame simply updates a plan +(no actual computation is performed). Calling collect triggers the +computation.

+
+
Returns:
+

List of pyarrow.RecordBatch collected from the DataFrame.

+
+
+
+ +
+
+collect_column(column_name: str) pyarrow.Array | pyarrow.ChunkedArray
+

Executes this DataFrame for a single column.

+
+ +
+
+collect_partitioned() list[list[pyarrow.RecordBatch]]
+

Execute this DataFrame and collect all partitioned results.

+

This operation returns pyarrow.RecordBatch maintaining the input +partitioning.

+
+
Returns:
+

+
List of list of RecordBatch collected from the

DataFrame.

+
+
+

+
+
+
+ +
+
+count() int
+

Return the total number of rows in this DataFrame.

+

Note that this method will actually run a plan to calculate the +count, which may be slow for large or complicated DataFrames.

+
+
Returns:
+

Number of rows in the DataFrame.

+
+
+
+ +
+
+static default_str_repr(batches: list[pyarrow.RecordBatch], schema: pyarrow.Schema, has_more: bool, table_uuid: str | None = None) str
+

Return the default string representation of a DataFrame.

+

This method is used by the default formatter and implemented in Rust for +performance reasons.

+
+ +
+
+describe() DataFrame
+

Return the statistics for this DataFrame.

+

Only summarized numeric datatypes at the moments and returns nulls +for non-numeric datatypes.

+

The output format is modeled after pandas.

+
+
Returns:
+

A summary DataFrame containing statistics.

+
+
+
+ +
+
+distinct() DataFrame
+

Return a new DataFrame with all duplicated rows removed.

+
+
Returns:
+

DataFrame after removing duplicates.

+
+
+
+ +
+
+drop(*columns: str) DataFrame
+

Drop arbitrary amount of columns.

+

Column names are case-sensitive and do not require double quotes like +other operations such as select. Leading and trailing double quotes +are allowed and will be automatically stripped if present.

+
+
Parameters:
+

columns – Column names to drop from the dataframe. Both column_name +and "column_name" are accepted.

+
+
Returns:
+

DataFrame with those columns removed in the projection.

+
+
+

Example Usage:

+
df.drop('ID_For_Students')      # Works
+df.drop('"ID_For_Students"')    # Also works (quotes stripped)
+
+
+
+ +
+
+except_all(other: DataFrame) DataFrame
+

Calculate the exception of two DataFrame.

+

The two DataFrame must have exactly the same schema.

+
+
Parameters:
+

other – DataFrame to calculate exception with.

+
+
Returns:
+

DataFrame after exception.

+
+
+
+ +
+
+execute_stream() datafusion.record_batch.RecordBatchStream
+

Executes this DataFrame and returns a stream over a single partition.

+
+
Returns:
+

Record Batch Stream over a single partition.

+
+
+
+ +
+
+execute_stream_partitioned() list[datafusion.record_batch.RecordBatchStream]
+

Executes this DataFrame and returns a stream for each partition.

+
+
Returns:
+

One record batch stream per partition.

+
+
+
+ +
+
+execution_plan() datafusion.plan.ExecutionPlan
+

Return the execution/physical plan.

+
+
Returns:
+

Execution plan.

+
+
+
+ +
+
+explain(verbose: bool = False, analyze: bool = False) None
+

Print an explanation of the DataFrame’s plan so far.

+

If analyze is specified, runs the plan and reports metrics.

+
+
Parameters:
+
    +
  • verbose – If True, more details will be included.

  • +
  • analyze – If True, the plan will run and metrics reported.

  • +
+
+
+
+ +
+
+fill_null(value: Any, subset: list[str] | None = None) DataFrame
+

Fill null values in specified columns with a value.

+
+
Parameters:
+
    +
  • value – Value to replace nulls with. Will be cast to match column type.

  • +
  • subset – Optional list of column names to fill. If None, fills all columns.

  • +
+
+
Returns:
+

DataFrame with null values replaced where type casting is possible

+
+
+

Examples

+
>>> df = df.fill_null(0)  # Fill all nulls with 0 where possible
+>>> # Fill nulls in specific string columns
+>>> df = df.fill_null("missing", subset=["name", "category"])
+
+
+

Notes

+
    +
  • Only fills nulls in columns where the value can be cast to the column type

  • +
  • For columns where casting fails, the original column is kept unchanged

  • +
  • For columns not in subset, the original column is kept unchanged

  • +
+
+ +
+
+filter(*predicates: datafusion.expr.Expr | str) DataFrame
+

Return a DataFrame for which predicate evaluates to True.

+

Rows for which predicate evaluates to False or None are filtered +out. If more than one predicate is provided, these predicates will be +combined as a logical AND. Each predicate can be an +Expr created using helper functions such as +datafusion.col() or datafusion.lit(), or a SQL expression string +that will be parsed against the DataFrame schema. If more complex logic is +required, see the logical operations in functions.

+

Example:

+
from datafusion import col, lit
+df.filter(col("a") > lit(1))
+df.filter("a > 1")
+
+
+
+
Parameters:
+

predicates – Predicate expression(s) or SQL strings to filter the DataFrame.

+
+
Returns:
+

DataFrame after filtering.

+
+
+
+ +
+
+head(n: int = 5) DataFrame
+

Return a new DataFrame with a limited number of rows.

+
+
Parameters:
+

n – Number of rows to take from the head of the DataFrame.

+
+
Returns:
+

DataFrame after limiting.

+
+
+
+ +
+
+intersect(other: DataFrame) DataFrame
+

Calculate the intersection of two DataFrame.

+

The two DataFrame must have exactly the same schema.

+
+
Parameters:
+

other – DataFrame to intersect with.

+
+
Returns:
+

DataFrame after intersection.

+
+
+
+ +
+
+into_view(temporary: bool = False) datafusion.catalog.Table
+

Convert DataFrame into a Table.

+

Examples

+
>>> from datafusion import SessionContext
+>>> ctx = SessionContext()
+>>> df = ctx.sql("SELECT 1 AS value")
+>>> view = df.into_view()
+>>> ctx.register_table("values_view", view)
+>>> df.collect()  # The DataFrame is still usable
+>>> ctx.sql("SELECT value FROM values_view").collect()
+
+
+
+ +
+
+join(right: DataFrame, on: str | collections.abc.Sequence[str], how: Literal['inner', 'left', 'right', 'full', 'semi', 'anti'] = 'inner', *, left_on: None = None, right_on: None = None, join_keys: None = None, coalesce_duplicate_keys: bool = True) DataFrame
+
+join(right: DataFrame, on: None = None, how: Literal['inner', 'left', 'right', 'full', 'semi', 'anti'] = 'inner', *, left_on: str | collections.abc.Sequence[str], right_on: str | collections.abc.Sequence[str], join_keys: tuple[list[str], list[str]] | None = None, coalesce_duplicate_keys: bool = True) DataFrame
+
+join(right: DataFrame, on: None = None, how: Literal['inner', 'left', 'right', 'full', 'semi', 'anti'] = 'inner', *, join_keys: tuple[list[str], list[str]], left_on: None = None, right_on: None = None, coalesce_duplicate_keys: bool = True) DataFrame
+

Join this DataFrame with another DataFrame.

+

on has to be provided or both left_on and right_on in conjunction.

+
+
Parameters:
+
    +
  • right – Other DataFrame to join with.

  • +
  • on – Column names to join on in both dataframes.

  • +
  • how – Type of join to perform. Supported types are “inner”, “left”, +“right”, “full”, “semi”, “anti”.

  • +
  • left_on – Join column of the left dataframe.

  • +
  • right_on – Join column of the right dataframe.

  • +
  • coalesce_duplicate_keys – When True, coalesce the columns +from the right DataFrame and left DataFrame +that have identical names in the on fields.

  • +
  • join_keys – Tuple of two lists of column names to join on. [Deprecated]

  • +
+
+
Returns:
+

DataFrame after join.

+
+
+
+ +
+
+join_on(right: DataFrame, *on_exprs: datafusion.expr.Expr, how: Literal['inner', 'left', 'right', 'full', 'semi', 'anti'] = 'inner') DataFrame
+

Join two DataFrame using the specified expressions.

+

Join predicates must be Expr objects, typically +built with datafusion.col(). On expressions are used to support +in-equality predicates. Equality predicates are correctly optimized.

+

Example:

+
from datafusion import col
+df.join_on(other_df, col("id") == col("other_id"))
+
+
+
+
Parameters:
+
    +
  • right – Other DataFrame to join with.

  • +
  • on_exprs – single or multiple (in)-equality predicates.

  • +
  • how – Type of join to perform. Supported types are “inner”, “left”, +“right”, “full”, “semi”, “anti”.

  • +
+
+
Returns:
+

DataFrame after join.

+
+
+
+ +
+
+limit(count: int, offset: int = 0) DataFrame
+

Return a new DataFrame with a limited number of rows.

+
+
Parameters:
+
    +
  • count – Number of rows to limit the DataFrame to.

  • +
  • offset – Number of rows to skip.

  • +
+
+
Returns:
+

DataFrame after limiting.

+
+
+
+ +
+
+logical_plan() datafusion.plan.LogicalPlan
+

Return the unoptimized LogicalPlan.

+
+
Returns:
+

Unoptimized logical plan.

+
+
+
+ +
+
+optimized_logical_plan() datafusion.plan.LogicalPlan
+

Return the optimized LogicalPlan.

+
+
Returns:
+

Optimized logical plan.

+
+
+
+ +
+
+parse_sql_expr(expr: str) datafusion.expr.Expr
+

Creates logical expression from a SQL query text.

+

The expression is created and processed against the current schema.

+

Example:

+
from datafusion import col, lit
+df.parse_sql_expr("a > 1")
+
+should produce:
+
+col("a") > lit(1)
+
+
+
+
Parameters:
+

expr – Expression string to be converted to datafusion expression

+
+
Returns:
+

Logical expression .

+
+
+
+ +
+
+repartition(num: int) DataFrame
+

Repartition a DataFrame into num partitions.

+

The batches allocation uses a round-robin algorithm.

+
+
Parameters:
+

num – Number of partitions to repartition the DataFrame into.

+
+
Returns:
+

Repartitioned DataFrame.

+
+
+
+ +
+
+repartition_by_hash(*exprs: datafusion.expr.Expr | str, num: int) DataFrame
+

Repartition a DataFrame using a hash partitioning scheme.

+
+
Parameters:
+
    +
  • exprs – Expressions or a SQL expression string to evaluate +and perform hashing on.

  • +
  • num – Number of partitions to repartition the DataFrame into.

  • +
+
+
Returns:
+

Repartitioned DataFrame.

+
+
+
+ +
+
+schema() pyarrow.Schema
+

Return the pyarrow.Schema of this DataFrame.

+

The output schema contains information on the name, data type, and +nullability for each column.

+
+
Returns:
+

Describing schema of the DataFrame

+
+
+
+ +
+
+select(*exprs: datafusion.expr.Expr | str) DataFrame
+

Project arbitrary expressions into a new DataFrame.

+
+
Parameters:
+

exprs – Either column names or Expr to select.

+
+
Returns:
+

DataFrame after projection. It has one column for each expression.

+
+
+

Example usage:

+

The following example will return 3 columns from the original dataframe. +The first two columns will be the original column a and b since the +string “a” is assumed to refer to column selection. Also a duplicate of +column a will be returned with the column name alternate_a:

+
df = df.select("a", col("b"), col("a").alias("alternate_a"))
+
+
+
+ +
+
+select_columns(*args: str) DataFrame
+

Filter the DataFrame by columns.

+
+
Returns:
+

DataFrame only containing the specified columns.

+
+
+
+ +
+
+select_exprs(*args: str) DataFrame
+

Project arbitrary list of expression strings into a new DataFrame.

+

This method will parse string expressions into logical plan expressions. +The output DataFrame has one column for each expression.

+
+
Returns:
+

DataFrame only containing the specified columns.

+
+
+
+ +
+
+show(num: int = 20) None
+

Execute the DataFrame and print the result to the console.

+
+
Parameters:
+

num – Number of lines to show.

+
+
+
+ +
+
+sort(*exprs: datafusion.expr.SortKey) DataFrame
+

Sort the DataFrame by the specified sorting expressions or column names.

+

Note that any expression can be turned into a sort expression by +calling its sort method.

+
+
Parameters:
+

exprs – Sort expressions or column names, applied in order.

+
+
Returns:
+

DataFrame after sorting.

+
+
+
+ +
+
+tail(n: int = 5) DataFrame
+

Return a new DataFrame with a limited number of rows.

+

Be aware this could be potentially expensive since the row size needs to be +determined of the dataframe. This is done by collecting it.

+
+
Parameters:
+

n – Number of rows to take from the tail of the DataFrame.

+
+
Returns:
+

DataFrame after limiting.

+
+
+
+ +
+
+to_arrow_table() pyarrow.Table
+

Execute the DataFrame and convert it into an Arrow Table.

+
+
Returns:
+

Arrow Table.

+
+
+
+ +
+
+to_pandas() pandas.DataFrame
+

Execute the DataFrame and convert it into a Pandas DataFrame.

+
+
Returns:
+

Pandas DataFrame.

+
+
+
+ +
+
+to_polars() polars.DataFrame
+

Execute the DataFrame and convert it into a Polars DataFrame.

+
+
Returns:
+

Polars DataFrame.

+
+
+
+ +
+
+to_pydict() dict[str, list[Any]]
+

Execute the DataFrame and convert it into a dictionary of lists.

+
+
Returns:
+

Dictionary of lists.

+
+
+
+ +
+
+to_pylist() list[dict[str, Any]]
+

Execute the DataFrame and convert it into a list of dictionaries.

+
+
Returns:
+

List of dictionaries.

+
+
+
+ +
+
+transform(func: collections.abc.Callable[Ellipsis, DataFrame], *args: Any) DataFrame
+

Apply a function to the current DataFrame which returns another DataFrame.

+

This is useful for chaining together multiple functions. For example:

+
def add_3(df: DataFrame) -> DataFrame:
+    return df.with_column("modified", lit(3))
+
+def within_limit(df: DataFrame, limit: int) -> DataFrame:
+    return df.filter(col("a") < lit(limit)).distinct()
+
+df = df.transform(modify_df).transform(within_limit, 4)
+
+
+
+
Parameters:
+
    +
  • func – A callable function that takes a DataFrame as it’s first argument

  • +
  • args – Zero or more arguments to pass to func

  • +
+
+
Returns:
+

After applying func to the original dataframe.

+
+
Return type:
+

DataFrame

+
+
+
+ +
+
+union(other: DataFrame, distinct: bool = False) DataFrame
+

Calculate the union of two DataFrame.

+

The two DataFrame must have exactly the same schema.

+
+
Parameters:
+
    +
  • other – DataFrame to union with.

  • +
  • distinct – If True, duplicate rows will be removed.

  • +
+
+
Returns:
+

DataFrame after union.

+
+
+
+ +
+
+union_distinct(other: DataFrame) DataFrame
+

Calculate the distinct union of two DataFrame.

+

The two DataFrame must have exactly the same schema. +Any duplicate rows are discarded.

+
+
Parameters:
+

other – DataFrame to union with.

+
+
Returns:
+

DataFrame after union.

+
+
+
+ +
+
+unnest_columns(*columns: str, preserve_nulls: bool = True) DataFrame
+

Expand columns of arrays into a single row per array element.

+
+
Parameters:
+
    +
  • columns – Column names to perform unnest operation on.

  • +
  • preserve_nulls – If False, rows with null entries will not be +returned.

  • +
+
+
Returns:
+

A DataFrame with the columns expanded.

+
+
+
+ +
+
+with_column(name: str, expr: datafusion.expr.Expr | str) DataFrame
+

Add an additional column to the DataFrame.

+

The expr must be an Expr constructed with +datafusion.col() or datafusion.lit(), or a SQL expression +string that will be parsed against the DataFrame schema.

+

Example:

+
from datafusion import col, lit
+df.with_column("b", col("a") + lit(1))
+
+
+
+
Parameters:
+
    +
  • name – Name of the column to add.

  • +
  • expr – Expression to compute the column.

  • +
+
+
Returns:
+

DataFrame with the new column.

+
+
+
+ +
+
+with_column_renamed(old_name: str, new_name: str) DataFrame
+

Rename one column by applying a new projection.

+

This is a no-op if the column to be renamed does not exist.

+

The method supports case sensitive rename with wrapping column name +into one the following symbols (” or ‘ or `).

+
+
Parameters:
+
    +
  • old_name – Old column name.

  • +
  • new_name – New column name.

  • +
+
+
Returns:
+

DataFrame with the column renamed.

+
+
+
+ +
+
+with_columns(*exprs: datafusion.expr.Expr | str | collections.abc.Iterable[datafusion.expr.Expr | str], **named_exprs: datafusion.expr.Expr | str) DataFrame
+

Add columns to the DataFrame.

+

By passing expressions, iterables of expressions, string SQL expressions, +or named expressions. +All expressions must be Expr objects created via +datafusion.col() or datafusion.lit(), or SQL expression strings. +To pass named expressions use the form name=Expr.

+

Example usage: The following will add 4 columns labeled a, b, c, +and d:

+
from datafusion import col, lit
+df = df.with_columns(
+    col("x").alias("a"),
+    [lit(1).alias("b"), col("y").alias("c")],
+    d=lit(3)
+)
+
+Equivalent example using just SQL strings:
+
+df = df.with_columns(
+    "x as a",
+    ["1 as b", "y as c"],
+    d="3"
+)
+
+
+
+
Parameters:
+
    +
  • exprs – Either a single expression, an iterable of expressions to add or +SQL expression strings.

  • +
  • named_exprs – Named expressions in the form of name=expr

  • +
+
+
Returns:
+

DataFrame with the new columns added.

+
+
+
+ +
+
+write_csv(path: str | pathlib.Path, with_header: bool = False, write_options: DataFrameWriteOptions | None = None) None
+

Execute the DataFrame and write the results to a CSV file.

+
+
Parameters:
+
    +
  • path – Path of the CSV file to write.

  • +
  • with_header – If true, output the CSV header row.

  • +
  • write_options – Options that impact how the DataFrame is written.

  • +
+
+
+
+ +
+
+write_json(path: str | pathlib.Path, write_options: DataFrameWriteOptions | None = None) None
+

Execute the DataFrame and write the results to a JSON file.

+
+
Parameters:
+
    +
  • path – Path of the JSON file to write.

  • +
  • write_options – Options that impact how the DataFrame is written.

  • +
+
+
+
+ +
+
+write_parquet(path: str | pathlib.Path, compression: str, compression_level: int | None = None, write_options: DataFrameWriteOptions | None = None) None
+
+write_parquet(path: str | pathlib.Path, compression: Compression = Compression.ZSTD, compression_level: int | None = None, write_options: DataFrameWriteOptions | None = None) None
+
+write_parquet(path: str | pathlib.Path, compression: ParquetWriterOptions, compression_level: None = None, write_options: DataFrameWriteOptions | None = None) None
+

Execute the DataFrame and write the results to a Parquet file.

+

Available compression types are:

+
    +
  • “uncompressed”: No compression.

  • +
  • “snappy”: Snappy compression.

  • +
  • “gzip”: Gzip compression.

  • +
  • “brotli”: Brotli compression.

  • +
  • “lz4”: LZ4 compression.

  • +
  • “lz4_raw”: LZ4_RAW compression.

  • +
  • “zstd”: Zstandard compression.

  • +
+

LZO compression is not yet implemented in arrow-rs and is therefore +excluded.

+
+
Parameters:
+
    +
  • path – Path of the Parquet file to write.

  • +
  • compression – Compression type to use. Default is “ZSTD”.

  • +
  • compression_level – Compression level to use. For ZSTD, the +recommended range is 1 to 22, with the default being 4. Higher levels +provide better compression but slower speed.

  • +
  • write_options – Options that impact how the DataFrame is written.

  • +
+
+
+
+ +
+
+write_parquet_with_options(path: str | pathlib.Path, options: ParquetWriterOptions, write_options: DataFrameWriteOptions | None = None) None
+

Execute the DataFrame and write the results to a Parquet file.

+

Allows advanced writer options to be set with ParquetWriterOptions.

+
+
Parameters:
+
    +
  • path – Path of the Parquet file to write.

  • +
  • options – Sets the writer parquet options (see ParquetWriterOptions).

  • +
  • write_options – Options that impact how the DataFrame is written.

  • +
+
+
+
+ +
+
+write_table(table_name: str, write_options: DataFrameWriteOptions | None = None) None
+

Execute the DataFrame and write the results to a table.

+

The table must be registered with the session to perform this operation. +Not all table providers support writing operations. See the individual +implementations for details.

+
+ +
+
+df
+
+ +
+ +
+
+class datafusion.dataframe.DataFrameWriteOptions(insert_operation: InsertOp | None = None, single_file_output: bool = False, partition_by: str | collections.abc.Sequence[str] | None = None, sort_by: datafusion.expr.Expr | datafusion.expr.SortExpr | collections.abc.Sequence[datafusion.expr.Expr] | collections.abc.Sequence[datafusion.expr.SortExpr] | None = None)
+

Writer options for DataFrame.

+

There is no guarantee the table provider supports all writer options. +See the individual implementation and documentation for details.

+

Instantiate writer options for DataFrame.

+
+
+_raw_write_options
+
+ +
+ +
+
+class datafusion.dataframe.InsertOp
+

Bases: enum.Enum

+

Insert operation mode.

+

These modes are used by the table writing feature to define how record +batches should be written to a table.

+
+
+APPEND
+

Appends new rows to the existing table without modifying any existing rows.

+
+ +
+
+OVERWRITE
+

Overwrites all existing rows in the table with the new rows.

+
+ +
+
+REPLACE
+

Replace existing rows that collide with the inserted rows.

+

Replacement is typically based on a unique key or primary key.

+
+ +
+ +
+
+class datafusion.dataframe.ParquetColumnOptions(encoding: str | None = None, dictionary_enabled: bool | None = None, compression: str | None = None, statistics_enabled: str | None = None, bloom_filter_enabled: bool | None = None, bloom_filter_fpp: float | None = None, bloom_filter_ndv: int | None = None)
+

Parquet options for individual columns.

+

Contains the available options that can be applied for an individual Parquet column, +replacing the global options in ParquetWriterOptions.

+

Initialize the ParquetColumnOptions.

+
+
Parameters:
+
    +
  • encoding – Sets encoding for the column path. Valid values are: plain, +plain_dictionary, rle, bit_packed, delta_binary_packed, +delta_length_byte_array, delta_byte_array, rle_dictionary, +and byte_stream_split. These values are not case-sensitive. If +None, uses the default parquet options

  • +
  • dictionary_enabled – Sets if dictionary encoding is enabled for the column +path. If None, uses the default parquet options

  • +
  • compression – Sets default parquet compression codec for the column path. +Valid values are uncompressed, snappy, gzip(level), lzo, +brotli(level), lz4, zstd(level), and lz4_raw. These +values are not case-sensitive. If None, uses the default parquet +options.

  • +
  • statistics_enabled – Sets if statistics are enabled for the column Valid +values are: none, chunk, and page These values are not case +sensitive. If None, uses the default parquet options.

  • +
  • bloom_filter_enabled – Sets if bloom filter is enabled for the column path. +If None, uses the default parquet options.

  • +
  • bloom_filter_fpp – Sets bloom filter false positive probability for the +column path. If None, uses the default parquet options.

  • +
  • bloom_filter_ndv – Sets bloom filter number of distinct values. If None, +uses the default parquet options.

  • +
+
+
+
+
+bloom_filter_enabled = None
+
+ +
+
+bloom_filter_fpp = None
+
+ +
+
+bloom_filter_ndv = None
+
+ +
+
+compression = None
+
+ +
+
+dictionary_enabled = None
+
+ +
+
+encoding = None
+
+ +
+
+statistics_enabled = None
+
+ +
+ +
+
+class datafusion.dataframe.ParquetWriterOptions(data_pagesize_limit: int = 1024 * 1024, write_batch_size: int = 1024, writer_version: str = '1.0', skip_arrow_metadata: bool = False, compression: str | None = 'zstd(3)', compression_level: int | None = None, dictionary_enabled: bool | None = True, dictionary_page_size_limit: int = 1024 * 1024, statistics_enabled: str | None = 'page', max_row_group_size: int = 1024 * 1024, created_by: str = 'datafusion-python', column_index_truncate_length: int | None = 64, statistics_truncate_length: int | None = None, data_page_row_count_limit: int = 20000, encoding: str | None = None, bloom_filter_on_write: bool = False, bloom_filter_fpp: float | None = None, bloom_filter_ndv: int | None = None, allow_single_file_parallelism: bool = True, maximum_parallel_row_group_writers: int = 1, maximum_buffered_record_batches_per_stream: int = 2, column_specific_options: dict[str, ParquetColumnOptions] | None = None)
+

Advanced parquet writer options.

+

Allows settings the writer options that apply to the entire file. Some options can +also be set on a column by column basis, with the field column_specific_options +(see ParquetColumnOptions).

+

Initialize the ParquetWriterOptions.

+
+
Parameters:
+
    +
  • data_pagesize_limit – Sets best effort maximum size of data page in bytes.

  • +
  • write_batch_size – Sets write_batch_size in bytes.

  • +
  • writer_version – Sets parquet writer version. Valid values are 1.0 and +2.0.

  • +
  • skip_arrow_metadata – Skip encoding the embedded arrow metadata in the +KV_meta.

  • +
  • compression

    Compression type to use. Default is zstd(3). +Available compression types are

    +
      +
    • uncompressed: No compression.

    • +
    • snappy: Snappy compression.

    • +
    • gzip(n): Gzip compression with level n.

    • +
    • brotli(n): Brotli compression with level n.

    • +
    • lz4: LZ4 compression.

    • +
    • lz4_raw: LZ4_RAW compression.

    • +
    • zstd(n): Zstandard compression with level n.

    • +
    +

  • +
  • compression_level – Compression level to set.

  • +
  • dictionary_enabled – Sets if dictionary encoding is enabled. If None, +uses the default parquet writer setting.

  • +
  • dictionary_page_size_limit – Sets best effort maximum dictionary page size, +in bytes.

  • +
  • statistics_enabled – Sets if statistics are enabled for any column Valid +values are none, chunk, and page. If None, uses the +default parquet writer setting.

  • +
  • max_row_group_size – Target maximum number of rows in each row group +(defaults to 1M rows). Writing larger row groups requires more memory +to write, but can get better compression and be faster to read.

  • +
  • created_by – Sets “created by” property.

  • +
  • column_index_truncate_length – Sets column index truncate length.

  • +
  • statistics_truncate_length – Sets statistics truncate length. If None, +uses the default parquet writer setting.

  • +
  • data_page_row_count_limit – Sets best effort maximum number of rows in a data +page.

  • +
  • encoding – Sets default encoding for any column. Valid values are plain, +plain_dictionary, rle, bit_packed, delta_binary_packed, +delta_length_byte_array, delta_byte_array, rle_dictionary, +and byte_stream_split. If None, uses the default parquet writer +setting.

  • +
  • bloom_filter_on_write – Write bloom filters for all columns when creating +parquet files.

  • +
  • bloom_filter_fpp – Sets bloom filter false positive probability. If None, +uses the default parquet writer setting

  • +
  • bloom_filter_ndv – Sets bloom filter number of distinct values. If None, +uses the default parquet writer setting.

  • +
  • allow_single_file_parallelism – Controls whether DataFusion will attempt to +speed up writing parquet files by serializing them in parallel. Each +column in each row group in each output file are serialized in parallel +leveraging a maximum possible core count of +n_files * n_row_groups * n_columns.

  • +
  • maximum_parallel_row_group_writers – By default parallel parquet writer is +tuned for minimum memory usage in a streaming execution plan. You may +see a performance benefit when writing large parquet files by increasing +maximum_parallel_row_group_writers and +maximum_buffered_record_batches_per_stream if your system has idle +cores and can tolerate additional memory usage. Boosting these values is +likely worthwhile when writing out already in-memory data, such as from +a cached data frame.

  • +
  • maximum_buffered_record_batches_per_stream – See +maximum_parallel_row_group_writers.

  • +
  • column_specific_options – Overrides options for specific columns. If a column +is not a part of this dictionary, it will use the parameters provided +here.

  • +
+
+
+
+
+allow_single_file_parallelism = True
+
+ +
+
+bloom_filter_fpp = None
+
+ +
+
+bloom_filter_ndv = None
+
+ +
+
+bloom_filter_on_write = False
+
+ +
+
+column_index_truncate_length = 64
+
+ +
+
+column_specific_options = None
+
+ +
+
+created_by = 'datafusion-python'
+
+ +
+
+data_page_row_count_limit = 20000
+
+ +
+
+data_pagesize_limit = 1048576
+
+ +
+
+dictionary_enabled = True
+
+ +
+
+dictionary_page_size_limit = 1048576
+
+ +
+
+encoding = None
+
+ +
+
+max_row_group_size = 1048576
+
+ +
+
+maximum_buffered_record_batches_per_stream = 2
+
+ +
+
+maximum_parallel_row_group_writers = 1
+
+ +
+
+skip_arrow_metadata = False
+
+ +
+
+statistics_enabled = 'page'
+
+ +
+
+statistics_truncate_length = None
+
+ +
+
+write_batch_size = 1024
+
+ +
+
+writer_version = '1.0'
+
+ +
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/dataframe_formatter/index.html b/autoapi/datafusion/dataframe_formatter/index.html new file mode 100644 index 000000000..f611c472a --- /dev/null +++ b/autoapi/datafusion/dataframe_formatter/index.html @@ -0,0 +1,1639 @@ + + + + + + + + datafusion.dataframe_formatter — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

datafusion.dataframe_formatter

+

HTML formatting utilities for DataFusion DataFrames.

+
+

Classes

+ + + + + + + + + + + + + + + + + + +

CellFormatter

Protocol for cell value formatters.

DataFrameHtmlFormatter

Configurable HTML formatter for DataFusion DataFrames.

DefaultStyleProvider

Default implementation of StyleProvider.

FormatterManager

Manager class for the global DataFrame HTML formatter instance.

StyleProvider

Protocol for HTML style providers.

+
+
+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

_refresh_formatter_reference(→ None)

Refresh formatter reference in any modules using it.

_validate_bool(→ None)

Validate that a parameter is a boolean.

_validate_formatter_parameters(→ int)

Validate all formatter parameters and return resolved max_rows value.

_validate_positive_int(→ None)

Validate that a parameter is a positive integer.

configure_formatter(→ None)

Configure the global DataFrame HTML formatter.

get_formatter(→ DataFrameHtmlFormatter)

Get the current global DataFrame HTML formatter.

reset_formatter(→ None)

Reset the global DataFrame HTML formatter to default settings.

set_formatter(→ None)

Set the global DataFrame HTML formatter.

+
+
+

Module Contents

+
+
+class datafusion.dataframe_formatter.CellFormatter
+

Bases: Protocol

+

Protocol for cell value formatters.

+
+
+__call__(value: Any) str
+

Format a cell value to string representation.

+
+ +
+ +
+
+class datafusion.dataframe_formatter.DataFrameHtmlFormatter(max_cell_length: int = 25, max_width: int = 1000, max_height: int = 300, max_memory_bytes: int = 2 * 1024 * 1024, min_rows: int = 10, max_rows: int | None = None, repr_rows: int | None = None, enable_cell_expansion: bool = True, custom_css: str | None = None, show_truncation_message: bool = True, style_provider: StyleProvider | None = None, use_shared_styles: bool = True)
+

Configurable HTML formatter for DataFusion DataFrames.

+

This class handles the HTML rendering of DataFrames for display in +Jupyter notebooks and other rich display contexts.

+

This class supports extension through composition. Key extension points: +- Provide a custom StyleProvider for styling cells and headers +- Register custom formatters for specific types +- Provide custom cell builders for specialized cell rendering

+
+
Parameters:
+
    +
  • max_cell_length – Maximum characters to display in a cell before truncation

  • +
  • max_width – Maximum width of the HTML table in pixels

  • +
  • max_height – Maximum height of the HTML table in pixels

  • +
  • max_memory_bytes – Maximum memory in bytes for rendered data (default: 2MB)

  • +
  • min_rows – Minimum number of rows to display (must be <= max_rows)

  • +
  • max_rows – Maximum number of rows to display in repr output

  • +
  • repr_rows – Deprecated alias for max_rows

  • +
  • enable_cell_expansion – Whether to add expand/collapse buttons for long cell +values

  • +
  • custom_css – Additional CSS to include in the HTML output

  • +
  • show_truncation_message – Whether to display a message when data is truncated

  • +
  • style_provider – Custom provider for cell and header styles

  • +
  • use_shared_styles – Whether to load styles and scripts only once per notebook +session

  • +
+
+
+

Initialize the HTML formatter.

+
+
Parameters:
+
    +
  • max_cell_length – Maximum length of cell content before truncation.

  • +
  • max_width – Maximum width of the displayed table in pixels.

  • +
  • max_height – Maximum height of the displayed table in pixels.

  • +
  • max_memory_bytes – Maximum memory in bytes for rendered data. Helps prevent performance +issues with large datasets.

  • +
  • min_rows – Minimum number of rows to display even if memory limit is reached. +Must not exceed max_rows.

  • +
  • max_rows – Maximum number of rows to display. Takes precedence over memory limits +when fewer rows are requested.

  • +
  • repr_rows – Deprecated alias for max_rows. Use max_rows instead.

  • +
  • enable_cell_expansion – Whether to allow cells to expand when clicked.

  • +
  • custom_css – Custom CSS to apply to the HTML table.

  • +
  • show_truncation_message – Whether to show a message indicating that content has been truncated.

  • +
  • style_provider – Provider of CSS styles for the HTML table. If None, DefaultStyleProvider +is used.

  • +
  • use_shared_styles – Whether to use shared styles across multiple tables. This improves +performance when displaying many DataFrames in a single notebook.

  • +
  • Raises

  • +
  • ------

  • +
  • ValueError – If max_cell_length, max_width, max_height, max_memory_bytes, +min_rows or max_rows is not a positive integer, or if min_rows +exceeds max_rows.

  • +
  • TypeError – If enable_cell_expansion, show_truncation_message, or use_shared_styles is +not a boolean, or if custom_css is provided but is not a string, or if +style_provider is provided but does not implement the StyleProvider +protocol.

  • +
+
+
+
+
+_build_expandable_cell(formatted_value: str, row_count: int, col_idx: int, table_uuid: str) str
+

Build an expandable cell for long content.

+
+ +
+ +

Build the HTML footer with JavaScript and messages.

+
+ +
+
+_build_html_header() list[str]
+

Build the HTML header with CSS styles.

+
+ +
+
+_build_regular_cell(formatted_value: str) str
+

Build a regular table cell.

+
+ +
+
+_build_table_body(batches: list, table_uuid: str) list[str]
+

Build the HTML table body with data rows.

+
+ +
+
+_build_table_container_start() list[str]
+

Build the opening tags for the table container.

+
+ +
+
+_build_table_header(schema: Any) list[str]
+

Build the HTML table header with column names.

+
+ +
+
+_format_cell_value(value: Any) str
+

Format a cell value for display.

+

Uses registered type formatters if available.

+
+
Parameters:
+

value – The cell value to format

+
+
Returns:
+

Formatted cell value as string

+
+
+
+ +
+
+_get_cell_value(column: Any, row_idx: int) Any
+

Extract a cell value from a column.

+
+
Parameters:
+
    +
  • column – Arrow array

  • +
  • row_idx – Row index

  • +
+
+
Returns:
+

The raw cell value

+
+
+
+ +
+
+_get_default_css() str
+

Get default CSS styles for the HTML table.

+
+ +
+
+_get_javascript() str
+

Get JavaScript code for interactive elements.

+
+ +
+
+format_html(batches: list, schema: Any, has_more: bool = False, table_uuid: str | None = None) str
+

Format record batches as HTML.

+

This method is used by DataFrame’s _repr_html_ implementation and can be +called directly when custom HTML rendering is needed.

+
+
Parameters:
+
    +
  • batches – List of Arrow RecordBatch objects

  • +
  • schema – Arrow Schema object

  • +
  • has_more – Whether there are more batches not shown

  • +
  • table_uuid – Unique ID for the table, used for JavaScript interactions

  • +
+
+
Returns:
+

HTML string representation of the data

+
+
Raises:
+

TypeError – If schema is invalid and no batches are provided

+
+
+
+ +
+
+format_str(batches: list, schema: Any, has_more: bool = False, table_uuid: str | None = None) str
+

Format record batches as a string.

+

This method is used by DataFrame’s __repr__ implementation and can be +called directly when string rendering is needed.

+
+
Parameters:
+
    +
  • batches – List of Arrow RecordBatch objects

  • +
  • schema – Arrow Schema object

  • +
  • has_more – Whether there are more batches not shown

  • +
  • table_uuid – Unique ID for the table, used for JavaScript interactions

  • +
+
+
Returns:
+

String representation of the data

+
+
Raises:
+

TypeError – If schema is invalid and no batches are provided

+
+
+
+ +
+
+register_formatter(type_class: type, formatter: CellFormatter) None
+

Register a custom formatter for a specific data type.

+
+
Parameters:
+
    +
  • type_class – The type to register a formatter for

  • +
  • formatter – Function that takes a value of the given type and returns +a formatted string

  • +
+
+
+
+ +
+
+set_custom_cell_builder(builder: collections.abc.Callable[[Any, int, int, str], str]) None
+

Set a custom cell builder function.

+
+
Parameters:
+

builder – Function that takes (value, row, col, table_id) and returns HTML

+
+
+
+ +
+
+set_custom_header_builder(builder: collections.abc.Callable[[Any], str]) None
+

Set a custom header builder function.

+
+
Parameters:
+

builder – Function that takes a field and returns HTML

+
+
+
+ +
+
+_custom_cell_builder: collections.abc.Callable[[Any, int, int, str], str] | None = None
+
+ +
+
+_custom_header_builder: collections.abc.Callable[[Any], str] | None = None
+
+ +
+
+_max_rows = None
+
+ +
+
+_type_formatters: dict[type, CellFormatter]
+
+ +
+
+custom_css = None
+
+ +
+
+enable_cell_expansion = True
+
+ +
+
+max_cell_length = 25
+
+ +
+
+max_height = 300
+
+ +
+
+max_memory_bytes = 2097152
+
+ +
+
+property max_rows: int
+

Get the maximum number of rows to display.

+
+
Returns:
+

The maximum number of rows to display in repr output

+
+
+
+ +
+
+max_width = 1000
+
+ +
+
+min_rows = 10
+
+ +
+
+property repr_rows: int
+

Get the maximum number of rows (deprecated name).

+
+

Deprecated since version Use: max_rows instead. This property is provided for +backward compatibility.

+
+
+
Returns:
+

The maximum number of rows to display

+
+
+
+ +
+
+show_truncation_message = True
+
+ +
+
+style_provider
+
+ +
+
+use_shared_styles = True
+
+ +
+ +
+
+class datafusion.dataframe_formatter.DefaultStyleProvider
+

Default implementation of StyleProvider.

+
+
+get_cell_style() str
+

Get the CSS style for table cells.

+
+
Returns:
+

CSS style string

+
+
+
+ +
+
+get_header_style() str
+

Get the CSS style for header cells.

+
+
Returns:
+

CSS style string

+
+
+
+ +
+ +
+
+class datafusion.dataframe_formatter.FormatterManager
+

Manager class for the global DataFrame HTML formatter instance.

+
+
+classmethod get_formatter() DataFrameHtmlFormatter
+

Get the current global DataFrame HTML formatter.

+
+
Returns:
+

The global HTML formatter instance

+
+
+
+ +
+
+classmethod set_formatter(formatter: DataFrameHtmlFormatter) None
+

Set the global DataFrame HTML formatter.

+
+
Parameters:
+

formatter – The formatter instance to use globally

+
+
+
+ +
+
+_default_formatter: DataFrameHtmlFormatter
+
+ +
+ +
+
+class datafusion.dataframe_formatter.StyleProvider
+

Bases: Protocol

+

Protocol for HTML style providers.

+
+
+get_cell_style() str
+

Get the CSS style for table cells.

+
+ +
+
+get_header_style() str
+

Get the CSS style for header cells.

+
+ +
+ +
+
+datafusion.dataframe_formatter._refresh_formatter_reference() None
+

Refresh formatter reference in any modules using it.

+

This helps ensure that changes to the formatter are reflected in existing +DataFrames that might be caching the formatter reference.

+
+ +
+
+datafusion.dataframe_formatter._validate_bool(value: Any, param_name: str) None
+

Validate that a parameter is a boolean.

+
+
Parameters:
+
    +
  • value – The value to validate

  • +
  • param_name – Name of the parameter (used in error message)

  • +
+
+
Raises:
+

TypeError – If the value is not a boolean

+
+
+
+ +
+
+datafusion.dataframe_formatter._validate_formatter_parameters(max_cell_length: int, max_width: int, max_height: int, max_memory_bytes: int, min_rows: int, max_rows: int | None, repr_rows: int | None, enable_cell_expansion: bool, show_truncation_message: bool, use_shared_styles: bool, custom_css: str | None, style_provider: Any) int
+

Validate all formatter parameters and return resolved max_rows value.

+
+
Parameters:
+
    +
  • max_cell_length – Maximum cell length value to validate

  • +
  • max_width – Maximum width value to validate

  • +
  • max_height – Maximum height value to validate

  • +
  • max_memory_bytes – Maximum memory bytes value to validate

  • +
  • min_rows – Minimum rows to display value to validate

  • +
  • max_rows – Maximum rows value to validate (None means use default)

  • +
  • repr_rows – Deprecated repr_rows value to validate

  • +
  • enable_cell_expansion – Boolean expansion flag to validate

  • +
  • show_truncation_message – Boolean message flag to validate

  • +
  • use_shared_styles – Boolean styles flag to validate

  • +
  • custom_css – Custom CSS string to validate

  • +
  • style_provider – Style provider object to validate

  • +
+
+
Returns:
+

The resolved max_rows value after handling repr_rows deprecation

+
+
Raises:
+
    +
  • ValueError – If any numeric parameter is invalid or constraints are violated

  • +
  • TypeError – If any parameter has invalid type

  • +
  • DeprecationWarning – If repr_rows parameter is used

  • +
+
+
+
+ +
+
+datafusion.dataframe_formatter._validate_positive_int(value: Any, param_name: str) None
+

Validate that a parameter is a positive integer.

+
+
Parameters:
+
    +
  • value – The value to validate

  • +
  • param_name – Name of the parameter (used in error message)

  • +
+
+
Raises:
+

ValueError – If the value is not a positive integer

+
+
+
+ +
+
+datafusion.dataframe_formatter.configure_formatter(**kwargs: Any) None
+

Configure the global DataFrame HTML formatter.

+

This function creates a new formatter with the provided configuration +and sets it as the global formatter for all DataFrames.

+
+
Parameters:
+

**kwargs – Formatter configuration parameters like max_cell_length, +max_width, max_height, enable_cell_expansion, etc.

+
+
Raises:
+

ValueError – If any invalid parameters are provided

+
+
+

Example

+
>>> from datafusion.html_formatter import configure_formatter
+>>> configure_formatter(
+...     max_cell_length=50,
+...     max_height=500,
+...     enable_cell_expansion=True,
+...     use_shared_styles=True
+... )
+
+
+
+ +
+
+datafusion.dataframe_formatter.get_formatter() DataFrameHtmlFormatter
+

Get the current global DataFrame HTML formatter.

+

This function is used by the DataFrame._repr_html_ implementation to access +the shared formatter instance. It can also be used directly when custom +HTML rendering is needed.

+
+
Returns:
+

The global HTML formatter instance

+
+
+

Example

+
>>> from datafusion.html_formatter import get_formatter
+>>> formatter = get_formatter()
+>>> formatter.max_cell_length = 50  # Increase cell length
+
+
+
+ +
+
+datafusion.dataframe_formatter.reset_formatter() None
+

Reset the global DataFrame HTML formatter to default settings.

+

This function creates a new formatter with default configuration +and sets it as the global formatter for all DataFrames.

+

Example

+
>>> from datafusion.html_formatter import reset_formatter
+>>> reset_formatter()  # Reset formatter to default settings
+
+
+
+ +
+
+datafusion.dataframe_formatter.set_formatter(formatter: DataFrameHtmlFormatter) None
+

Set the global DataFrame HTML formatter.

+
+
Parameters:
+

formatter – The formatter instance to use globally

+
+
+

Example

+
>>> from datafusion.html_formatter import get_formatter, set_formatter
+>>> custom_formatter = DataFrameHtmlFormatter(max_cell_length=100)
+>>> set_formatter(custom_formatter)
+
+
+
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/expr/index.html b/autoapi/datafusion/expr/index.html new file mode 100644 index 000000000..44cae0131 --- /dev/null +++ b/autoapi/datafusion/expr/index.html @@ -0,0 +1,4439 @@ + + + + + + + + datafusion.expr — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

datafusion.expr

+

This module supports expressions, one of the core concepts in DataFusion.

+

See Expressions in the online documentation for more details.

+
+

Attributes

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Aggregate

AggregateFunction

Alias

Analyze

Between

BinaryExpr

Case

Cast

Column

CopyTo

CreateCatalog

CreateCatalogSchema

CreateExternalTable

CreateFunction

CreateFunctionBody

CreateIndex

CreateMemoryTable

CreateView

Deallocate

DescribeTable

Distinct

DmlStatement

DropCatalogSchema

DropFunction

DropTable

DropView

EXPR_TYPE_ERROR

EmptyRelation

Execute

Exists

Explain

Extension

FileType

Filter

GroupingSet

ILike

InList

InSubquery

IsFalse

IsNotFalse

IsNotNull

IsNotTrue

IsNotUnknown

IsNull

IsTrue

IsUnknown

Join

JoinConstraint

JoinType

Like

Limit

Literal

Negative

Not

OperateFunctionArg

Partitioning

Placeholder

Prepare

Projection

RecursiveQuery

Repartition

ScalarSubquery

ScalarVariable

SetVariable

SimilarTo

Sort

SortKey

Subquery

SubqueryAlias

TableScan

TransactionAccessMode

TransactionConclusion

TransactionEnd

TransactionIsolationLevel

TransactionStart

TryCast

Union

Unnest

UnnestExpr

Values

WindowExpr

+
+
+

Classes

+ + + + + + + + + + + + + + + + + + + + + +

CaseBuilder

Builder class for constructing case statements.

Expr

Expression object.

SortExpr

Used to specify sorting on either a DataFrame or function.

Window

Define reusable window parameters.

WindowFrame

Defines a window frame for performing window operations.

WindowFrameBound

Defines a single window frame bound.

+
+
+

Functions

+ + + + + + + + + +

ensure_expr(→ datafusion._internal.expr.Expr)

Return the internal expression from Expr or raise TypeError.

ensure_expr_list(→ list[datafusion._internal.expr.Expr])

Flatten an iterable of expressions, validating each via ensure_expr.

+
+
+

Module Contents

+
+
+class datafusion.expr.CaseBuilder(case_builder: datafusion._internal.expr.CaseBuilder)
+

Builder class for constructing case statements.

+

An example usage would be as follows:

+
import datafusion.functions as f
+from datafusion import lit, col
+df.select(
+    f.case(col("column_a"))
+    .when(lit(1), lit("One"))
+    .when(lit(2), lit("Two"))
+    .otherwise(lit("Unknown"))
+)
+
+
+

Constructs a case builder.

+

This is not typically called by the end user directly. See +datafusion.functions.case() instead.

+
+
+end() Expr
+

Finish building a case statement.

+

Any non-matching cases will end in a null value.

+
+ +
+
+otherwise(else_expr: Expr) Expr
+

Set a default value for the case statement.

+
+ +
+
+when(when_expr: Expr, then_expr: Expr) CaseBuilder
+

Add a case to match against.

+
+ +
+
+case_builder
+
+ +
+ +
+
+class datafusion.expr.Expr(expr: datafusion._internal.expr.RawExpr)
+

Expression object.

+

Expressions are one of the core concepts in DataFusion. See +Expressions in the online documentation for more information.

+

This constructor should not be called by the end user.

+
+
+__add__(rhs: Any) Expr
+

Addition operator.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__and__(rhs: Expr) Expr
+

Logical AND.

+
+ +
+
+__eq__(rhs: object) Expr
+

Equal to.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__ge__(rhs: Any) Expr
+

Greater than or equal to.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__getitem__(key: str | int) Expr
+

Retrieve sub-object.

+

If key is a string, returns the subfield of the struct. +If key is an integer, retrieves the element in the array. Note that the +element index begins at 0, unlike +array_element() which begins at 1. +If key is a slice, returns an array that contains a slice of the +original array. Similar to integer indexing, this follows Python convention +where the index begins at 0 unlike +array_slice() which begins at 1.

+
+ +
+
+__gt__(rhs: Any) Expr
+

Greater than.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__invert__() Expr
+

Binary not (~).

+
+ +
+
+__le__(rhs: Any) Expr
+

Less than or equal to.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__lt__(rhs: Any) Expr
+

Less than.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__mod__(rhs: Any) Expr
+

Modulo operator (%).

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__mul__(rhs: Any) Expr
+

Multiplication operator.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__ne__(rhs: object) Expr
+

Not equal to.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__or__(rhs: Expr) Expr
+

Logical OR.

+
+ +
+
+__repr__() str
+

Generate a string representation of this expression.

+
+ +
+
+__richcmp__(other: Expr, op: int) Expr
+

Comparison operator.

+
+ +
+
+__sub__(rhs: Any) Expr
+

Subtraction operator.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__truediv__(rhs: Any) Expr
+

Division operator.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+abs() Expr
+

Return the absolute value of a given number.

+
+

Returns:

+
+
Expr

A new expression representing the absolute value of the input expression.

+
+
+
+
+ +
+
+acos() Expr
+

Returns the arc cosine or inverse cosine of a number.

+
+

Returns:

+
+
Expr

A new expression representing the arc cosine of the input expression.

+
+
+
+
+ +
+
+acosh() Expr
+

Returns inverse hyperbolic cosine.

+
+ +
+
+alias(name: str, metadata: dict[str, str] | None = None) Expr
+

Assign a name to the expression.

+
+
Parameters:
+
    +
  • name – The name to assign to the expression.

  • +
  • metadata – Optional metadata to attach to the expression.

  • +
+
+
Returns:
+

A new expression with the assigned name.

+
+
+
+ +
+
+array_dims() Expr
+

Returns an array of the array’s dimensions.

+
+ +
+
+array_distinct() Expr
+

Returns distinct values from the array after removing duplicates.

+
+ +
+
+array_empty() Expr
+

Returns a boolean indicating whether the array is empty.

+
+ +
+
+array_length() Expr
+

Returns the length of the array.

+
+ +
+
+array_ndims() Expr
+

Returns the number of dimensions of the array.

+
+ +
+
+array_pop_back() Expr
+

Returns the array without the last element.

+
+ +
+
+array_pop_front() Expr
+

Returns the array without the first element.

+
+ +
+
+arrow_typeof() Expr
+

Returns the Arrow type of the expression.

+
+ +
+
+ascii() Expr
+

Returns the numeric code of the first character of the argument.

+
+ +
+
+asin() Expr
+

Returns the arc sine or inverse sine of a number.

+
+ +
+
+asinh() Expr
+

Returns inverse hyperbolic sine.

+
+ +
+
+atan() Expr
+

Returns inverse tangent of a number.

+
+ +
+
+atanh() Expr
+

Returns inverse hyperbolic tangent.

+
+ +
+
+between(low: Any, high: Any, negated: bool = False) Expr
+

Returns True if this expression is between a given range.

+
+
Parameters:
+
    +
  • low – lower bound of the range (inclusive).

  • +
  • high – higher bound of the range (inclusive).

  • +
  • negated – negates whether the expression is between a given range

  • +
+
+
+
+ +
+
+bit_length() Expr
+

Returns the number of bits in the string argument.

+
+ +
+
+btrim() Expr
+

Removes all characters, spaces by default, from both sides of a string.

+
+ +
+
+canonical_name() str
+

Returns a complete string representation of this expression.

+
+ +
+
+cardinality() Expr
+

Returns the total number of elements in the array.

+
+ +
+
+cast(to: pyarrow.DataType[Any] | type) Expr
+

Cast to a new data type.

+
+ +
+
+cbrt() Expr
+

Returns the cube root of a number.

+
+ +
+
+ceil() Expr
+

Returns the nearest integer greater than or equal to argument.

+
+ +
+
+char_length() Expr
+

The number of characters in the string.

+
+ +
+
+character_length() Expr
+

Returns the number of characters in the argument.

+
+ +
+
+chr() Expr
+

Converts the Unicode code point to a UTF8 character.

+
+ +
+
+static column(value: str) Expr
+

Creates a new expression representing a column.

+
+ +
+
+column_name(plan: datafusion.plan.LogicalPlan) str
+

Compute the output column name based on the provided logical plan.

+
+ +
+
+cos() Expr
+

Returns the cosine of the argument.

+
+ +
+
+cosh() Expr
+

Returns the hyperbolic cosine of the argument.

+
+ +
+
+cot() Expr
+

Returns the cotangent of the argument.

+
+ +
+
+degrees() Expr
+

Converts the argument from radians to degrees.

+
+ +
+
+display_name() str
+

Returns the name of this expression as it should appear in a schema.

+

This name will not include any CAST expressions.

+
+ +
+
+distinct() ExprFuncBuilder
+

Only evaluate distinct values for an aggregate function.

+

This function will create an ExprFuncBuilder that can be used to +set parameters for either window or aggregate functions. If used on any other +type of expression, an error will be generated when build() is called.

+
+ +
+
+empty() Expr
+

This is an alias for array_empty().

+
+ +
+
+exp() Expr
+

Returns the exponential of the argument.

+
+ +
+
+factorial() Expr
+

Returns the factorial of the argument.

+
+ +
+
+fill_nan(value: Any | Expr | None = None) Expr
+

Fill NaN values with a provided value.

+
+ +
+
+fill_null(value: Any | Expr | None = None) Expr
+

Fill NULL values with a provided value.

+
+ +
+
+filter(filter: Expr) ExprFuncBuilder
+

Filter an aggregate function.

+

This function will create an ExprFuncBuilder that can be used to +set parameters for either window or aggregate functions. If used on any other +type of expression, an error will be generated when build() is called.

+
+ +
+
+flatten() Expr
+

Flattens an array of arrays into a single array.

+
+ +
+
+floor() Expr
+

Returns the nearest integer less than or equal to the argument.

+
+ +
+
+from_unixtime() Expr
+

Converts an integer to RFC3339 timestamp format string.

+
+ +
+
+initcap() Expr
+

Set the initial letter of each word to capital.

+

Converts the first letter of each word in string to uppercase and the +remaining characters to lowercase.

+
+ +
+
+is_not_null() Expr
+

Returns True if this expression is not null.

+
+ +
+
+is_null() Expr
+

Returns True if this expression is null.

+
+ +
+
+isnan() Expr
+

Returns true if a given number is +NaN or -NaN otherwise returns false.

+
+ +
+
+iszero() Expr
+

Returns true if a given number is +0.0 or -0.0 otherwise returns false.

+
+ +
+
+length() Expr
+

The number of characters in the string.

+
+ +
+
+list_dims() Expr
+

Returns an array of the array’s dimensions.

+

This is an alias for array_dims().

+
+ +
+
+list_distinct() Expr
+

Returns distinct values from the array after removing duplicates.

+

This is an alias for array_distinct().

+
+ +
+
+list_length() Expr
+

Returns the length of the array.

+

This is an alias for array_length().

+
+ +
+
+list_ndims() Expr
+

Returns the number of dimensions of the array.

+

This is an alias for array_ndims().

+
+ +
+
+static literal(value: Any) Expr
+

Creates a new expression representing a scalar value.

+

value must be a valid PyArrow scalar value or easily castable to one.

+
+ +
+
+static literal_with_metadata(value: Any, metadata: dict[str, str]) Expr
+

Creates a new expression representing a scalar value with metadata.

+
+
Parameters:
+
    +
  • value – A valid PyArrow scalar value or easily castable to one.

  • +
  • metadata – Metadata to attach to the expression.

  • +
+
+
+
+ +
+
+ln() Expr
+

Returns the natural logarithm (base e) of the argument.

+
+ +
+
+log10() Expr
+

Base 10 logarithm of the argument.

+
+ +
+
+log2() Expr
+

Base 2 logarithm of the argument.

+
+ +
+
+lower() Expr
+

Converts a string to lowercase.

+
+ +
+
+ltrim() Expr
+

Removes all characters, spaces by default, from the beginning of a string.

+
+ +
+
+md5() Expr
+

Computes an MD5 128-bit checksum for a string expression.

+
+ +
+
+null_treatment(null_treatment: datafusion.common.NullTreatment) ExprFuncBuilder
+

Set the treatment for null values for a window or aggregate function.

+

This function will create an ExprFuncBuilder that can be used to +set parameters for either window or aggregate functions. If used on any other +type of expression, an error will be generated when build() is called.

+
+ +
+
+octet_length() Expr
+

Returns the number of bytes of a string.

+
+ +
+
+order_by(*exprs: Expr | SortExpr) ExprFuncBuilder
+

Set the ordering for a window or aggregate function.

+

This function will create an ExprFuncBuilder that can be used to +set parameters for either window or aggregate functions. If used on any other +type of expression, an error will be generated when build() is called.

+
+ +
+
+over(window: Window) Expr
+

Turn an aggregate function into a window function.

+

This function turns any aggregate function into a window function. With the +exception of partition_by, how each of the parameters is used is determined +by the underlying aggregate function.

+
+
Parameters:
+

window – Window definition

+
+
+
+ +
+
+partition_by(*partition_by: Expr) ExprFuncBuilder
+

Set the partitioning for a window function.

+

This function will create an ExprFuncBuilder that can be used to +set parameters for either window or aggregate functions. If used on any other +type of expression, an error will be generated when build() is called.

+
+ +
+
+python_value() Any
+

Extracts the Expr value into Any.

+

This is only valid for literal expressions.

+
+
Returns:
+

Python object representing literal value of the expression.

+
+
+
+ +
+
+radians() Expr
+

Converts the argument from degrees to radians.

+
+ +
+
+reverse() Expr
+

Reverse the string argument.

+
+ +
+
+rex_call_operands() list[Expr]
+

Return the operands of the expression based on it’s variant type.

+

Row expressions, Rex(s), operate on the concept of operands. Different +variants of Expressions, Expr(s), store those operands in different +datastructures. This function examines the Expr variant and returns +the operands to the calling logic.

+
+ +
+
+rex_call_operator() str
+

Extracts the operator associated with a row expression type call.

+
+ +
+
+rex_type() datafusion.common.RexType
+

Return the Rex Type of this expression.

+

A Rex (Row Expression) specifies a single row of data.That specification +could include user defined functions or types. RexType identifies the +row as one of the possible valid RexType.

+
+ +
+
+rtrim() Expr
+

Removes all characters, spaces by default, from the end of a string.

+
+ +
+
+schema_name() str
+

Returns the name of this expression as it should appear in a schema.

+

This name will not include any CAST expressions.

+
+ +
+
+sha224() Expr
+

Computes the SHA-224 hash of a binary string.

+
+ +
+
+sha256() Expr
+

Computes the SHA-256 hash of a binary string.

+
+ +
+
+sha384() Expr
+

Computes the SHA-384 hash of a binary string.

+
+ +
+
+sha512() Expr
+

Computes the SHA-512 hash of a binary string.

+
+ +
+
+signum() Expr
+

Returns the sign of the argument (-1, 0, +1).

+
+ +
+
+sin() Expr
+

Returns the sine of the argument.

+
+ +
+
+sinh() Expr
+

Returns the hyperbolic sine of the argument.

+
+ +
+
+sort(ascending: bool = True, nulls_first: bool = True) SortExpr
+

Creates a sort Expr from an existing Expr.

+
+
Parameters:
+
    +
  • ascending – If true, sort in ascending order.

  • +
  • nulls_first – Return null values first.

  • +
+
+
+
+ +
+
+sqrt() Expr
+

Returns the square root of the argument.

+
+ +
+
+static string_literal(value: str) Expr
+

Creates a new expression representing a UTF8 literal value.

+

It is different from literal because it is pa.string() instead of +pa.string_view()

+

This is needed for cases where DataFusion is expecting a UTF8 instead of +UTF8View literal, like in: +https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179

+
+ +
+
+tan() Expr
+

Returns the tangent of the argument.

+
+ +
+
+tanh() Expr
+

Returns the hyperbolic tangent of the argument.

+
+ +
+
+to_hex() Expr
+

Converts an integer to a hexadecimal string.

+
+ +
+
+to_variant() Any
+

Convert this expression into a python object if possible.

+
+ +
+
+trim() Expr
+

Removes all characters, spaces by default, from both sides of a string.

+
+ +
+
+types() datafusion.common.DataTypeMap
+

Return the DataTypeMap.

+
+
Returns:
+

DataTypeMap which represents the PythonType, Arrow DataType, and +SqlType Enum which this expression represents.

+
+
+
+ +
+
+upper() Expr
+

Converts a string to uppercase.

+
+ +
+
+variant_name() str
+

Returns the name of the Expr variant.

+

Ex: IsNotNull, Literal, BinaryExpr, etc

+
+ +
+
+window_frame(window_frame: WindowFrame) ExprFuncBuilder
+

Set the frame fora window function.

+

This function will create an ExprFuncBuilder that can be used to +set parameters for either window or aggregate functions. If used on any other +type of expression, an error will be generated when build() is called.

+
+ +
+
+__radd__
+
+ +
+
+__rand__
+
+ +
+
+__rmod__
+
+ +
+
+__rmul__
+
+ +
+
+__ror__
+
+ +
+
+__rsub__
+
+ +
+
+__rtruediv__
+
+ +
+
+_to_pyarrow_types: ClassVar[dict[type, pyarrow.DataType]]
+
+ +
+
+expr
+
+ +
+ +
+
+class datafusion.expr.SortExpr(expr: Expr, ascending: bool, nulls_first: bool)
+

Used to specify sorting on either a DataFrame or function.

+

This constructor should not be called by the end user.

+
+
+__repr__() str
+

Generate a string representation of this expression.

+
+ +
+
+ascending() bool
+

Return ascending property.

+
+ +
+
+expr() Expr
+

Return the raw expr backing the SortExpr.

+
+ +
+
+nulls_first() bool
+

Return nulls_first property.

+
+ +
+
+raw_sort
+
+ +
+ +
+
+class datafusion.expr.Window(partition_by: list[Expr] | Expr | None = None, window_frame: WindowFrame | None = None, order_by: list[SortExpr | Expr | str] | Expr | SortExpr | str | None = None, null_treatment: datafusion.common.NullTreatment | None = None)
+

Define reusable window parameters.

+

Construct a window definition.

+
+
Parameters:
+
    +
  • partition_by – Partitions for window operation

  • +
  • window_frame – Define the start and end bounds of the window frame

  • +
  • order_by – Set ordering

  • +
  • null_treatment – Indicate how nulls are to be treated

  • +
+
+
+
+
+_null_treatment = None
+
+ +
+
+_order_by = None
+
+ +
+
+_partition_by = None
+
+ +
+
+_window_frame = None
+
+ +
+ +
+
+class datafusion.expr.WindowFrame(units: str, start_bound: Any | None, end_bound: Any | None)
+

Defines a window frame for performing window operations.

+

Construct a window frame using the given parameters.

+
+
Parameters:
+
    +
  • units – Should be one of rows, range, or groups.

  • +
  • start_bound – Sets the preceding bound. Must be >= 0. If none, this +will be set to unbounded. If unit type is groups, this +parameter must be set.

  • +
  • end_bound – Sets the following bound. Must be >= 0. If none, this +will be set to unbounded. If unit type is groups, this +parameter must be set.

  • +
+
+
+
+
+__repr__() str
+

Print a string representation of the window frame.

+
+ +
+
+get_frame_units() str
+

Returns the window frame units for the bounds.

+
+ +
+
+get_lower_bound() WindowFrameBound
+

Returns starting bound.

+
+ +
+
+get_upper_bound() WindowFrameBound
+

Returns end bound.

+
+ +
+
+window_frame
+
+ +
+ +
+
+class datafusion.expr.WindowFrameBound(frame_bound: datafusion._internal.expr.WindowFrameBound)
+

Defines a single window frame bound.

+

WindowFrame typically requires a start and end bound.

+

Constructs a window frame bound.

+
+
+get_offset() int | None
+

Returns the offset of the window frame.

+
+ +
+
+is_current_row() bool
+

Returns if the frame bound is current row.

+
+ +
+
+is_following() bool
+

Returns if the frame bound is following.

+
+ +
+
+is_preceding() bool
+

Returns if the frame bound is preceding.

+
+ +
+
+is_unbounded() bool
+

Returns if the frame bound is unbounded.

+
+ +
+
+frame_bound
+
+ +
+ +
+
+datafusion.expr.ensure_expr(value: Expr | Any) datafusion._internal.expr.Expr
+

Return the internal expression from Expr or raise TypeError.

+

This helper rejects plain strings and other non-Expr values so +higher level APIs consistently require explicit col() or +lit() expressions.

+
+
Parameters:
+

value – Candidate expression or other object.

+
+
Returns:
+

The internal expression representation.

+
+
Raises:
+

TypeError – If value is not an instance of Expr.

+
+
+
+ +
+
+datafusion.expr.ensure_expr_list(exprs: collections.abc.Iterable[Expr | collections.abc.Iterable[Expr]]) list[datafusion._internal.expr.Expr]
+

Flatten an iterable of expressions, validating each via ensure_expr.

+
+
Parameters:
+

exprs – Possibly nested iterable containing expressions.

+
+
Returns:
+

A flat list of raw expressions.

+
+
Raises:
+

TypeError – If any item is not an instance of Expr.

+
+
+
+ +
+
+datafusion.expr.Aggregate
+
+ +
+
+datafusion.expr.AggregateFunction
+
+ +
+
+datafusion.expr.Alias
+
+ +
+
+datafusion.expr.Analyze
+
+ +
+
+datafusion.expr.Between
+
+ +
+
+datafusion.expr.BinaryExpr
+
+ +
+
+datafusion.expr.Case
+
+ +
+
+datafusion.expr.Cast
+
+ +
+
+datafusion.expr.Column
+
+ +
+
+datafusion.expr.CopyTo
+
+ +
+
+datafusion.expr.CreateCatalog
+
+ +
+
+datafusion.expr.CreateCatalogSchema
+
+ +
+
+datafusion.expr.CreateExternalTable
+
+ +
+
+datafusion.expr.CreateFunction
+
+ +
+
+datafusion.expr.CreateFunctionBody
+
+ +
+
+datafusion.expr.CreateIndex
+
+ +
+
+datafusion.expr.CreateMemoryTable
+
+ +
+
+datafusion.expr.CreateView
+
+ +
+
+datafusion.expr.Deallocate
+
+ +
+
+datafusion.expr.DescribeTable
+
+ +
+
+datafusion.expr.Distinct
+
+ +
+
+datafusion.expr.DmlStatement
+
+ +
+
+datafusion.expr.DropCatalogSchema
+
+ +
+
+datafusion.expr.DropFunction
+
+ +
+
+datafusion.expr.DropTable
+
+ +
+
+datafusion.expr.DropView
+
+ +
+
+datafusion.expr.EXPR_TYPE_ERROR = 'Use col()/column() or lit()/literal() to construct expressions'
+
+ +
+
+datafusion.expr.EmptyRelation
+
+ +
+
+datafusion.expr.Execute
+
+ +
+
+datafusion.expr.Exists
+
+ +
+
+datafusion.expr.Explain
+
+ +
+
+datafusion.expr.Extension
+
+ +
+
+datafusion.expr.FileType
+
+ +
+
+datafusion.expr.Filter
+
+ +
+
+datafusion.expr.GroupingSet
+
+ +
+
+datafusion.expr.ILike
+
+ +
+
+datafusion.expr.InList
+
+ +
+
+datafusion.expr.InSubquery
+
+ +
+
+datafusion.expr.IsFalse
+
+ +
+
+datafusion.expr.IsNotFalse
+
+ +
+
+datafusion.expr.IsNotNull
+
+ +
+
+datafusion.expr.IsNotTrue
+
+ +
+
+datafusion.expr.IsNotUnknown
+
+ +
+
+datafusion.expr.IsNull
+
+ +
+
+datafusion.expr.IsTrue
+
+ +
+
+datafusion.expr.IsUnknown
+
+ +
+
+datafusion.expr.Join
+
+ +
+
+datafusion.expr.JoinConstraint
+
+ +
+
+datafusion.expr.JoinType
+
+ +
+
+datafusion.expr.Like
+
+ +
+
+datafusion.expr.Limit
+
+ +
+
+datafusion.expr.Literal
+
+ +
+
+datafusion.expr.Negative
+
+ +
+
+datafusion.expr.Not
+
+ +
+
+datafusion.expr.OperateFunctionArg
+
+ +
+
+datafusion.expr.Partitioning
+
+ +
+
+datafusion.expr.Placeholder
+
+ +
+
+datafusion.expr.Prepare
+
+ +
+
+datafusion.expr.Projection
+
+ +
+
+datafusion.expr.RecursiveQuery
+
+ +
+
+datafusion.expr.Repartition
+
+ +
+
+datafusion.expr.ScalarSubquery
+
+ +
+
+datafusion.expr.ScalarVariable
+
+ +
+
+datafusion.expr.SetVariable
+
+ +
+
+datafusion.expr.SimilarTo
+
+ +
+
+datafusion.expr.Sort
+
+ +
+
+datafusion.expr.SortKey
+
+ +
+
+datafusion.expr.Subquery
+
+ +
+
+datafusion.expr.SubqueryAlias
+
+ +
+
+datafusion.expr.TableScan
+
+ +
+
+datafusion.expr.TransactionAccessMode
+
+ +
+
+datafusion.expr.TransactionConclusion
+
+ +
+
+datafusion.expr.TransactionEnd
+
+ +
+
+datafusion.expr.TransactionIsolationLevel
+
+ +
+
+datafusion.expr.TransactionStart
+
+ +
+
+datafusion.expr.TryCast
+
+ +
+
+datafusion.expr.Union
+
+ +
+
+datafusion.expr.Unnest
+
+ +
+
+datafusion.expr.UnnestExpr
+
+ +
+
+datafusion.expr.Values
+
+ +
+
+datafusion.expr.WindowExpr
+
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/functions/index.html b/autoapi/datafusion/functions/index.html new file mode 100644 index 000000000..3ca535bb4 --- /dev/null +++ b/autoapi/datafusion/functions/index.html @@ -0,0 +1,5745 @@ + + + + + + + + datafusion.functions — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

datafusion.functions

+

User functions for operating on Expr.

+
+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

abs(→ datafusion.expr.Expr)

Return the absolute value of a given number.

acos(→ datafusion.expr.Expr)

Returns the arc cosine or inverse cosine of a number.

acosh(→ datafusion.expr.Expr)

Returns inverse hyperbolic cosine.

alias(→ datafusion.expr.Expr)

Creates an alias expression with an optional metadata dictionary.

approx_distinct(→ datafusion.expr.Expr)

Returns the approximate number of distinct values.

approx_median(→ datafusion.expr.Expr)

Returns the approximate median value.

approx_percentile_cont(→ datafusion.expr.Expr)

Returns the value that is approximately at a given percentile of expr.

approx_percentile_cont_with_weight(→ datafusion.expr.Expr)

Returns the value of the weighted approximate percentile.

array(→ datafusion.expr.Expr)

Returns an array using the specified input expressions.

array_agg(→ datafusion.expr.Expr)

Aggregate values into an array.

array_append(→ datafusion.expr.Expr)

Appends an element to the end of an array.

array_cat(→ datafusion.expr.Expr)

Concatenates the input arrays.

array_concat(→ datafusion.expr.Expr)

Concatenates the input arrays.

array_dims(→ datafusion.expr.Expr)

Returns an array of the array's dimensions.

array_distinct(→ datafusion.expr.Expr)

Returns distinct values from the array after removing duplicates.

array_element(→ datafusion.expr.Expr)

Extracts the element with the index n from the array.

array_empty(→ datafusion.expr.Expr)

Returns a boolean indicating whether the array is empty.

array_except(→ datafusion.expr.Expr)

Returns the elements that appear in array1 but not in array2.

array_extract(→ datafusion.expr.Expr)

Extracts the element with the index n from the array.

array_has(→ datafusion.expr.Expr)

Returns true if the element appears in the first array, otherwise false.

array_has_all(→ datafusion.expr.Expr)

Determines if there is complete overlap second_array in first_array.

array_has_any(→ datafusion.expr.Expr)

Determine if there is an overlap between first_array and second_array.

array_indexof(→ datafusion.expr.Expr)

Return the position of the first occurrence of element in array.

array_intersect(→ datafusion.expr.Expr)

Returns the intersection of array1 and array2.

array_join(→ datafusion.expr.Expr)

Converts each element to its text representation.

array_length(→ datafusion.expr.Expr)

Returns the length of the array.

array_ndims(→ datafusion.expr.Expr)

Returns the number of dimensions of the array.

array_pop_back(→ datafusion.expr.Expr)

Returns the array without the last element.

array_pop_front(→ datafusion.expr.Expr)

Returns the array without the first element.

array_position(→ datafusion.expr.Expr)

Return the position of the first occurrence of element in array.

array_positions(→ datafusion.expr.Expr)

Searches for an element in the array and returns all occurrences.

array_prepend(→ datafusion.expr.Expr)

Prepends an element to the beginning of an array.

array_push_back(→ datafusion.expr.Expr)

Appends an element to the end of an array.

array_push_front(→ datafusion.expr.Expr)

Prepends an element to the beginning of an array.

array_remove(→ datafusion.expr.Expr)

Removes the first element from the array equal to the given value.

array_remove_all(→ datafusion.expr.Expr)

Removes all elements from the array equal to the given value.

array_remove_n(→ datafusion.expr.Expr)

Removes the first max elements from the array equal to the given value.

array_repeat(→ datafusion.expr.Expr)

Returns an array containing element count times.

array_replace(→ datafusion.expr.Expr)

Replaces the first occurrence of from_val with to_val.

array_replace_all(→ datafusion.expr.Expr)

Replaces all occurrences of from_val with to_val.

array_replace_n(→ datafusion.expr.Expr)

Replace n occurrences of from_val with to_val.

array_resize(→ datafusion.expr.Expr)

Returns an array with the specified size filled.

array_slice(→ datafusion.expr.Expr)

Returns a slice of the array.

array_sort(→ datafusion.expr.Expr)

Sort an array.

array_to_string(→ datafusion.expr.Expr)

Converts each element to its text representation.

array_union(→ datafusion.expr.Expr)

Returns an array of the elements in the union of array1 and array2.

arrow_cast(→ datafusion.expr.Expr)

Casts an expression to a specified data type.

arrow_typeof(→ datafusion.expr.Expr)

Returns the Arrow type of the expression.

ascii(→ datafusion.expr.Expr)

Returns the numeric code of the first character of the argument.

asin(→ datafusion.expr.Expr)

Returns the arc sine or inverse sine of a number.

asinh(→ datafusion.expr.Expr)

Returns inverse hyperbolic sine.

atan(→ datafusion.expr.Expr)

Returns inverse tangent of a number.

atan2(→ datafusion.expr.Expr)

Returns inverse tangent of a division given in the argument.

atanh(→ datafusion.expr.Expr)

Returns inverse hyperbolic tangent.

avg(→ datafusion.expr.Expr)

Returns the average value.

bit_and(→ datafusion.expr.Expr)

Computes the bitwise AND of the argument.

bit_length(→ datafusion.expr.Expr)

Returns the number of bits in the string argument.

bit_or(→ datafusion.expr.Expr)

Computes the bitwise OR of the argument.

bit_xor(→ datafusion.expr.Expr)

Computes the bitwise XOR of the argument.

bool_and(→ datafusion.expr.Expr)

Computes the boolean AND of the argument.

bool_or(→ datafusion.expr.Expr)

Computes the boolean OR of the argument.

btrim(→ datafusion.expr.Expr)

Removes all characters, spaces by default, from both sides of a string.

cardinality(→ datafusion.expr.Expr)

Returns the total number of elements in the array.

case(→ datafusion.expr.CaseBuilder)

Create a case expression.

cbrt(→ datafusion.expr.Expr)

Returns the cube root of a number.

ceil(→ datafusion.expr.Expr)

Returns the nearest integer greater than or equal to argument.

char_length(→ datafusion.expr.Expr)

The number of characters in the string.

character_length(→ datafusion.expr.Expr)

Returns the number of characters in the argument.

chr(→ datafusion.expr.Expr)

Converts the Unicode code point to a UTF8 character.

coalesce(→ datafusion.expr.Expr)

Returns the value of the first expr in args which is not NULL.

col(→ datafusion.expr.Expr)

Creates a column reference expression.

concat(→ datafusion.expr.Expr)

Concatenates the text representations of all the arguments.

concat_ws(→ datafusion.expr.Expr)

Concatenates the list args with the separator.

corr(→ datafusion.expr.Expr)

Returns the correlation coefficient between value1 and value2.

cos(→ datafusion.expr.Expr)

Returns the cosine of the argument.

cosh(→ datafusion.expr.Expr)

Returns the hyperbolic cosine of the argument.

cot(→ datafusion.expr.Expr)

Returns the cotangent of the argument.

count(→ datafusion.expr.Expr)

Returns the number of rows that match the given arguments.

count_star(→ datafusion.expr.Expr)

Create a COUNT(1) aggregate expression.

covar(→ datafusion.expr.Expr)

Computes the sample covariance.

covar_pop(→ datafusion.expr.Expr)

Computes the population covariance.

covar_samp(→ datafusion.expr.Expr)

Computes the sample covariance.

cume_dist(→ datafusion.expr.Expr)

Create a cumulative distribution window function.

current_date(→ datafusion.expr.Expr)

Returns current UTC date as a Date32 value.

current_time(→ datafusion.expr.Expr)

Returns current UTC time as a Time64 value.

date_bin(→ datafusion.expr.Expr)

Coerces an arbitrary timestamp to the start of the nearest specified interval.

date_part(→ datafusion.expr.Expr)

Extracts a subfield from the date.

date_trunc(→ datafusion.expr.Expr)

Truncates the date to a specified level of precision.

datepart(→ datafusion.expr.Expr)

Return a specified part of a date.

datetrunc(→ datafusion.expr.Expr)

Truncates the date to a specified level of precision.

decode(→ datafusion.expr.Expr)

Decode the input, using the encoding. encoding can be base64 or hex.

degrees(→ datafusion.expr.Expr)

Converts the argument from radians to degrees.

dense_rank(→ datafusion.expr.Expr)

Create a dense_rank window function.

digest(→ datafusion.expr.Expr)

Computes the binary hash of an expression using the specified algorithm.

empty(→ datafusion.expr.Expr)

This is an alias for array_empty().

encode(→ datafusion.expr.Expr)

Encode the input, using the encoding. encoding can be base64 or hex.

ends_with(→ datafusion.expr.Expr)

Returns true if the string ends with the suffix, false otherwise.

exp(→ datafusion.expr.Expr)

Returns the exponential of the argument.

extract(→ datafusion.expr.Expr)

Extracts a subfield from the date.

factorial(→ datafusion.expr.Expr)

Returns the factorial of the argument.

find_in_set(→ datafusion.expr.Expr)

Find a string in a list of strings.

first_value(→ datafusion.expr.Expr)

Returns the first value in a group of values.

flatten(→ datafusion.expr.Expr)

Flattens an array of arrays into a single array.

floor(→ datafusion.expr.Expr)

Returns the nearest integer less than or equal to the argument.

from_unixtime(→ datafusion.expr.Expr)

Converts an integer to RFC3339 timestamp format string.

gcd(→ datafusion.expr.Expr)

Returns the greatest common divisor.

in_list(→ datafusion.expr.Expr)

Returns whether the argument is contained within the list values.

initcap(→ datafusion.expr.Expr)

Set the initial letter of each word to capital.

isnan(→ datafusion.expr.Expr)

Returns true if a given number is +NaN or -NaN otherwise returns false.

iszero(→ datafusion.expr.Expr)

Returns true if a given number is +0.0 or -0.0 otherwise returns false.

lag(→ datafusion.expr.Expr)

Create a lag window function.

last_value(→ datafusion.expr.Expr)

Returns the last value in a group of values.

lcm(→ datafusion.expr.Expr)

Returns the least common multiple.

lead(→ datafusion.expr.Expr)

Create a lead window function.

left(→ datafusion.expr.Expr)

Returns the first n characters in the string.

length(→ datafusion.expr.Expr)

The number of characters in the string.

levenshtein(→ datafusion.expr.Expr)

Returns the Levenshtein distance between the two given strings.

list_append(→ datafusion.expr.Expr)

Appends an element to the end of an array.

list_cat(→ datafusion.expr.Expr)

Concatenates the input arrays.

list_concat(→ datafusion.expr.Expr)

Concatenates the input arrays.

list_dims(→ datafusion.expr.Expr)

Returns an array of the array's dimensions.

list_distinct(→ datafusion.expr.Expr)

Returns distinct values from the array after removing duplicates.

list_element(→ datafusion.expr.Expr)

Extracts the element with the index n from the array.

list_except(→ datafusion.expr.Expr)

Returns the elements that appear in array1 but not in the array2.

list_extract(→ datafusion.expr.Expr)

Extracts the element with the index n from the array.

list_indexof(→ datafusion.expr.Expr)

Return the position of the first occurrence of element in array.

list_intersect(→ datafusion.expr.Expr)

Returns an the intersection of array1 and array2.

list_join(→ datafusion.expr.Expr)

Converts each element to its text representation.

list_length(→ datafusion.expr.Expr)

Returns the length of the array.

list_ndims(→ datafusion.expr.Expr)

Returns the number of dimensions of the array.

list_position(→ datafusion.expr.Expr)

Return the position of the first occurrence of element in array.

list_positions(→ datafusion.expr.Expr)

Searches for an element in the array and returns all occurrences.

list_prepend(→ datafusion.expr.Expr)

Prepends an element to the beginning of an array.

list_push_back(→ datafusion.expr.Expr)

Appends an element to the end of an array.

list_push_front(→ datafusion.expr.Expr)

Prepends an element to the beginning of an array.

list_remove(→ datafusion.expr.Expr)

Removes the first element from the array equal to the given value.

list_remove_all(→ datafusion.expr.Expr)

Removes all elements from the array equal to the given value.

list_remove_n(→ datafusion.expr.Expr)

Removes the first max elements from the array equal to the given value.

list_repeat(→ datafusion.expr.Expr)

Returns an array containing element count times.

list_replace(→ datafusion.expr.Expr)

Replaces the first occurrence of from_val with to_val.

list_replace_all(→ datafusion.expr.Expr)

Replaces all occurrences of from_val with to_val.

list_replace_n(→ datafusion.expr.Expr)

Replace n occurrences of from_val with to_val.

list_resize(→ datafusion.expr.Expr)

Returns an array with the specified size filled.

list_slice(→ datafusion.expr.Expr)

Returns a slice of the array.

list_sort(→ datafusion.expr.Expr)

This is an alias for array_sort().

list_to_string(→ datafusion.expr.Expr)

Converts each element to its text representation.

list_union(→ datafusion.expr.Expr)

Returns an array of the elements in the union of array1 and array2.

ln(→ datafusion.expr.Expr)

Returns the natural logarithm (base e) of the argument.

log(→ datafusion.expr.Expr)

Returns the logarithm of a number for a particular base.

log10(→ datafusion.expr.Expr)

Base 10 logarithm of the argument.

log2(→ datafusion.expr.Expr)

Base 2 logarithm of the argument.

lower(→ datafusion.expr.Expr)

Converts a string to lowercase.

lpad(→ datafusion.expr.Expr)

Add left padding to a string.

ltrim(→ datafusion.expr.Expr)

Removes all characters, spaces by default, from the beginning of a string.

make_array(→ datafusion.expr.Expr)

Returns an array using the specified input expressions.

make_date(→ datafusion.expr.Expr)

Make a date from year, month and day component parts.

make_list(→ datafusion.expr.Expr)

Returns an array using the specified input expressions.

max(→ datafusion.expr.Expr)

Aggregate function that returns the maximum value of the argument.

md5(→ datafusion.expr.Expr)

Computes an MD5 128-bit checksum for a string expression.

mean(→ datafusion.expr.Expr)

Returns the average (mean) value of the argument.

median(→ datafusion.expr.Expr)

Computes the median of a set of numbers.

min(→ datafusion.expr.Expr)

Aggregate function that returns the minimum value of the argument.

named_struct(→ datafusion.expr.Expr)

Returns a struct with the given names and arguments pairs.

nanvl(→ datafusion.expr.Expr)

Returns x if x is not NaN. Otherwise returns y.

now(→ datafusion.expr.Expr)

Returns the current timestamp in nanoseconds.

nth_value(→ datafusion.expr.Expr)

Returns the n-th value in a group of values.

ntile(→ datafusion.expr.Expr)

Create a n-tile window function.

nullif(→ datafusion.expr.Expr)

Returns NULL if expr1 equals expr2; otherwise it returns expr1.

nvl(→ datafusion.expr.Expr)

Returns x if x is not NULL. Otherwise returns y.

octet_length(→ datafusion.expr.Expr)

Returns the number of bytes of a string.

order_by(→ datafusion.expr.SortExpr)

Creates a new sort expression.

overlay(→ datafusion.expr.Expr)

Replace a substring with a new substring.

percent_rank(→ datafusion.expr.Expr)

Create a percent_rank window function.

pi(→ datafusion.expr.Expr)

Returns an approximate value of π.

pow(→ datafusion.expr.Expr)

Returns base raised to the power of exponent.

power(→ datafusion.expr.Expr)

Returns base raised to the power of exponent.

radians(→ datafusion.expr.Expr)

Converts the argument from degrees to radians.

random(→ datafusion.expr.Expr)

Returns a random value in the range 0.0 <= x < 1.0.

range(→ datafusion.expr.Expr)

Create a list of values in the range between start and stop.

rank(→ datafusion.expr.Expr)

Create a rank window function.

regexp_count(→ datafusion.expr.Expr)

Returns the number of matches in a string.

regexp_instr(→ datafusion.expr.Expr)

Returns the position of a regular expression match in a string.

regexp_like(→ datafusion.expr.Expr)

Find if any regular expression (regex) matches exist.

regexp_match(→ datafusion.expr.Expr)

Perform regular expression (regex) matching.

regexp_replace(→ datafusion.expr.Expr)

Replaces substring(s) matching a PCRE-like regular expression.

regr_avgx(→ datafusion.expr.Expr)

Computes the average of the independent variable x.

regr_avgy(→ datafusion.expr.Expr)

Computes the average of the dependent variable y.

regr_count(→ datafusion.expr.Expr)

Counts the number of rows in which both expressions are not null.

regr_intercept(→ datafusion.expr.Expr)

Computes the intercept from the linear regression.

regr_r2(→ datafusion.expr.Expr)

Computes the R-squared value from linear regression.

regr_slope(→ datafusion.expr.Expr)

Computes the slope from linear regression.

regr_sxx(→ datafusion.expr.Expr)

Computes the sum of squares of the independent variable x.

regr_sxy(→ datafusion.expr.Expr)

Computes the sum of products of pairs of numbers.

regr_syy(→ datafusion.expr.Expr)

Computes the sum of squares of the dependent variable y.

repeat(→ datafusion.expr.Expr)

Repeats the string to n times.

replace(→ datafusion.expr.Expr)

Replaces all occurrences of from_val with to_val in the string.

reverse(→ datafusion.expr.Expr)

Reverse the string argument.

right(→ datafusion.expr.Expr)

Returns the last n characters in the string.

round(→ datafusion.expr.Expr)

Round the argument to the nearest integer.

row_number(→ datafusion.expr.Expr)

Create a row number window function.

rpad(→ datafusion.expr.Expr)

Add right padding to a string.

rtrim(→ datafusion.expr.Expr)

Removes all characters, spaces by default, from the end of a string.

sha224(→ datafusion.expr.Expr)

Computes the SHA-224 hash of a binary string.

sha256(→ datafusion.expr.Expr)

Computes the SHA-256 hash of a binary string.

sha384(→ datafusion.expr.Expr)

Computes the SHA-384 hash of a binary string.

sha512(→ datafusion.expr.Expr)

Computes the SHA-512 hash of a binary string.

signum(→ datafusion.expr.Expr)

Returns the sign of the argument (-1, 0, +1).

sin(→ datafusion.expr.Expr)

Returns the sine of the argument.

sinh(→ datafusion.expr.Expr)

Returns the hyperbolic sine of the argument.

split_part(→ datafusion.expr.Expr)

Split a string and return one part.

sqrt(→ datafusion.expr.Expr)

Returns the square root of the argument.

starts_with(→ datafusion.expr.Expr)

Returns true if string starts with prefix.

stddev(→ datafusion.expr.Expr)

Computes the standard deviation of the argument.

stddev_pop(→ datafusion.expr.Expr)

Computes the population standard deviation of the argument.

stddev_samp(→ datafusion.expr.Expr)

Computes the sample standard deviation of the argument.

string_agg(→ datafusion.expr.Expr)

Concatenates the input strings.

strpos(→ datafusion.expr.Expr)

Finds the position from where the substring matches the string.

struct(→ datafusion.expr.Expr)

Returns a struct with the given arguments.

substr(→ datafusion.expr.Expr)

Substring from the position to the end.

substr_index(→ datafusion.expr.Expr)

Returns an indexed substring.

substring(→ datafusion.expr.Expr)

Substring from the position with length characters.

sum(→ datafusion.expr.Expr)

Computes the sum of a set of numbers.

tan(→ datafusion.expr.Expr)

Returns the tangent of the argument.

tanh(→ datafusion.expr.Expr)

Returns the hyperbolic tangent of the argument.

to_hex(→ datafusion.expr.Expr)

Converts an integer to a hexadecimal string.

to_timestamp(→ datafusion.expr.Expr)

Converts a string and optional formats to a Timestamp in nanoseconds.

to_timestamp_micros(→ datafusion.expr.Expr)

Converts a string and optional formats to a Timestamp in microseconds.

to_timestamp_millis(→ datafusion.expr.Expr)

Converts a string and optional formats to a Timestamp in milliseconds.

to_timestamp_nanos(→ datafusion.expr.Expr)

Converts a string and optional formats to a Timestamp in nanoseconds.

to_timestamp_seconds(→ datafusion.expr.Expr)

Converts a string and optional formats to a Timestamp in seconds.

to_unixtime(→ datafusion.expr.Expr)

Converts a string and optional formats to a Unixtime.

translate(→ datafusion.expr.Expr)

Replaces the characters in from_val with the counterpart in to_val.

trim(→ datafusion.expr.Expr)

Removes all characters, spaces by default, from both sides of a string.

trunc(→ datafusion.expr.Expr)

Truncate the number toward zero with optional precision.

upper(→ datafusion.expr.Expr)

Converts a string to uppercase.

uuid(→ datafusion.expr.Expr)

Returns uuid v4 as a string value.

var(→ datafusion.expr.Expr)

Computes the sample variance of the argument.

var_pop(→ datafusion.expr.Expr)

Computes the population variance of the argument.

var_samp(→ datafusion.expr.Expr)

Computes the sample variance of the argument.

var_sample(→ datafusion.expr.Expr)

Computes the sample variance of the argument.

when(→ datafusion.expr.CaseBuilder)

Create a case expression that has no base expression.

window(→ datafusion.expr.Expr)

Creates a new Window function expression.

+
+
+

Module Contents

+
+
+datafusion.functions.abs(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Return the absolute value of a given number.

+
+

Returns:

+
+
Expr

A new expression representing the absolute value of the input expression.

+
+
+
+
+ +
+
+datafusion.functions.acos(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the arc cosine or inverse cosine of a number.

+
+

Returns:

+
+
Expr

A new expression representing the arc cosine of the input expression.

+
+
+
+
+ +
+
+datafusion.functions.acosh(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns inverse hyperbolic cosine.

+
+ +
+
+datafusion.functions.alias(expr: datafusion.expr.Expr, name: str, metadata: dict[str, str] | None = None) datafusion.expr.Expr
+

Creates an alias expression with an optional metadata dictionary.

+
+
Parameters:
+
    +
  • expr – The expression to alias

  • +
  • name – The alias name

  • +
  • metadata – Optional metadata to attach to the column

  • +
+
+
Returns:
+

An expression with the given alias

+
+
+
+ +
+
+datafusion.functions.approx_distinct(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Returns the approximate number of distinct values.

+

This aggregate function is similar to count() with distinct set, but it +will approximate the number of distinct entries. It may return significantly faster +than count() for some DataFrames.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – Values to check for distinct entries

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.approx_median(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Returns the approximate median value.

+

This aggregate function is similar to median(), but it will only +approximate the median. It may return significantly faster for some DataFrames.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by and null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – Values to find the median for

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.approx_percentile_cont(sort_expression: datafusion.expr.Expr | datafusion.expr.SortExpr, percentile: float, num_centroids: int | None = None, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Returns the value that is approximately at a given percentile of expr.

+

This aggregate function assumes the input values form a continuous distribution. +Suppose you have a DataFrame which consists of 100 different test scores. If you +called this function with a percentile of 0.9, it would return the value of the +test score that is above 90% of the other test scores. The returned value may be +between two of the values.

+

This function uses the [t-digest](https://arxiv.org/abs/1902.04023) algorithm to +compute the percentile. You can limit the number of bins used in this algorithm by +setting the num_centroids parameter.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • sort_expression – Values for which to find the approximate percentile

  • +
  • percentile – This must be between 0.0 and 1.0, inclusive

  • +
  • num_centroids – Max bin size for the t-digest algorithm

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.approx_percentile_cont_with_weight(sort_expression: datafusion.expr.Expr | datafusion.expr.SortExpr, weight: datafusion.expr.Expr, percentile: float, num_centroids: int | None = None, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Returns the value of the weighted approximate percentile.

+

This aggregate function is similar to approx_percentile_cont() except that +it uses the associated associated weights.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • sort_expression – Values for which to find the approximate percentile

  • +
  • weight – Relative weight for each of the values in expression

  • +
  • percentile – This must be between 0.0 and 1.0, inclusive

  • +
  • num_centroids – Max bin size for the t-digest algorithm

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.array(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns an array using the specified input expressions.

+

This is an alias for make_array().

+
+ +
+
+datafusion.functions.array_agg(expression: datafusion.expr.Expr, distinct: bool = False, filter: datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) datafusion.expr.Expr
+

Aggregate values into an array.

+

Currently distinct and order_by cannot be used together. As a work around, +consider array_sort() after aggregation. +[Issue Tracker](https://github.com/apache/datafusion/issues/12371)

+

If using the builder functions described in ref:_aggregation this function ignores +the option null_treatment.

+
+
Parameters:
+
    +
  • expression – Values to combine into an array

  • +
  • distinct – If True, a single entry for each distinct value will be in the result

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
  • order_by – Order the resultant array values. Accepts column names or expressions.

  • +
+
+
+

For example:

+
df.aggregate([], array_agg(col("a"), order_by="b"))
+
+
+
+ +
+
+datafusion.functions.array_append(array: datafusion.expr.Expr, element: datafusion.expr.Expr) datafusion.expr.Expr
+

Appends an element to the end of an array.

+
+ +
+
+datafusion.functions.array_cat(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Concatenates the input arrays.

+

This is an alias for array_concat().

+
+ +
+
+datafusion.functions.array_concat(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Concatenates the input arrays.

+
+ +
+
+datafusion.functions.array_dims(array: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns an array of the array’s dimensions.

+
+ +
+
+datafusion.functions.array_distinct(array: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns distinct values from the array after removing duplicates.

+
+ +
+
+datafusion.functions.array_element(array: datafusion.expr.Expr, n: datafusion.expr.Expr) datafusion.expr.Expr
+

Extracts the element with the index n from the array.

+
+ +
+
+datafusion.functions.array_empty(array: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns a boolean indicating whether the array is empty.

+
+ +
+
+datafusion.functions.array_except(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the elements that appear in array1 but not in array2.

+
+ +
+
+datafusion.functions.array_extract(array: datafusion.expr.Expr, n: datafusion.expr.Expr) datafusion.expr.Expr
+

Extracts the element with the index n from the array.

+

This is an alias for array_element().

+
+ +
+
+datafusion.functions.array_has(first_array: datafusion.expr.Expr, second_array: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns true if the element appears in the first array, otherwise false.

+
+ +
+
+datafusion.functions.array_has_all(first_array: datafusion.expr.Expr, second_array: datafusion.expr.Expr) datafusion.expr.Expr
+

Determines if there is complete overlap second_array in first_array.

+

Returns true if each element of the second array appears in the first array. +Otherwise, it returns false.

+
+ +
+
+datafusion.functions.array_has_any(first_array: datafusion.expr.Expr, second_array: datafusion.expr.Expr) datafusion.expr.Expr
+

Determine if there is an overlap between first_array and second_array.

+

Returns true if at least one element of the second array appears in the first +array. Otherwise, it returns false.

+
+ +
+
+datafusion.functions.array_indexof(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) datafusion.expr.Expr
+

Return the position of the first occurrence of element in array.

+

This is an alias for array_position().

+
+ +
+
+datafusion.functions.array_intersect(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the intersection of array1 and array2.

+
+ +
+
+datafusion.functions.array_join(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts each element to its text representation.

+

This is an alias for array_to_string().

+
+ +
+
+datafusion.functions.array_length(array: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the length of the array.

+
+ +
+
+datafusion.functions.array_ndims(array: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the number of dimensions of the array.

+
+ +
+
+datafusion.functions.array_pop_back(array: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the array without the last element.

+
+ +
+
+datafusion.functions.array_pop_front(array: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the array without the first element.

+
+ +
+
+datafusion.functions.array_position(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) datafusion.expr.Expr
+

Return the position of the first occurrence of element in array.

+
+ +
+
+datafusion.functions.array_positions(array: datafusion.expr.Expr, element: datafusion.expr.Expr) datafusion.expr.Expr
+

Searches for an element in the array and returns all occurrences.

+
+ +
+
+datafusion.functions.array_prepend(element: datafusion.expr.Expr, array: datafusion.expr.Expr) datafusion.expr.Expr
+

Prepends an element to the beginning of an array.

+
+ +
+
+datafusion.functions.array_push_back(array: datafusion.expr.Expr, element: datafusion.expr.Expr) datafusion.expr.Expr
+

Appends an element to the end of an array.

+

This is an alias for array_append().

+
+ +
+
+datafusion.functions.array_push_front(element: datafusion.expr.Expr, array: datafusion.expr.Expr) datafusion.expr.Expr
+

Prepends an element to the beginning of an array.

+

This is an alias for array_prepend().

+
+ +
+
+datafusion.functions.array_remove(array: datafusion.expr.Expr, element: datafusion.expr.Expr) datafusion.expr.Expr
+

Removes the first element from the array equal to the given value.

+
+ +
+
+datafusion.functions.array_remove_all(array: datafusion.expr.Expr, element: datafusion.expr.Expr) datafusion.expr.Expr
+

Removes all elements from the array equal to the given value.

+
+ +
+
+datafusion.functions.array_remove_n(array: datafusion.expr.Expr, element: datafusion.expr.Expr, max: datafusion.expr.Expr) datafusion.expr.Expr
+

Removes the first max elements from the array equal to the given value.

+
+ +
+
+datafusion.functions.array_repeat(element: datafusion.expr.Expr, count: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns an array containing element count times.

+
+ +
+
+datafusion.functions.array_replace(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) datafusion.expr.Expr
+

Replaces the first occurrence of from_val with to_val.

+
+ +
+
+datafusion.functions.array_replace_all(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) datafusion.expr.Expr
+

Replaces all occurrences of from_val with to_val.

+
+ +
+
+datafusion.functions.array_replace_n(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr, max: datafusion.expr.Expr) datafusion.expr.Expr
+

Replace n occurrences of from_val with to_val.

+

Replaces the first max occurrences of the specified element with another +specified element.

+
+ +
+
+datafusion.functions.array_resize(array: datafusion.expr.Expr, size: datafusion.expr.Expr, value: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns an array with the specified size filled.

+

If size is greater than the array length, the additional entries will +be filled with the given value.

+
+ +
+
+datafusion.functions.array_slice(array: datafusion.expr.Expr, begin: datafusion.expr.Expr, end: datafusion.expr.Expr, stride: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Returns a slice of the array.

+
+ +
+
+datafusion.functions.array_sort(array: datafusion.expr.Expr, descending: bool = False, null_first: bool = False) datafusion.expr.Expr
+

Sort an array.

+
+
Parameters:
+
    +
  • array – The input array to sort.

  • +
  • descending – If True, sorts in descending order.

  • +
  • null_first – If True, nulls will be returned at the beginning of the array.

  • +
+
+
+
+ +
+
+datafusion.functions.array_to_string(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts each element to its text representation.

+
+ +
+
+datafusion.functions.array_union(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns an array of the elements in the union of array1 and array2.

+

Duplicate rows will not be returned.

+
+ +
+
+datafusion.functions.arrow_cast(expr: datafusion.expr.Expr, data_type: datafusion.expr.Expr) datafusion.expr.Expr
+

Casts an expression to a specified data type.

+
+ +
+
+datafusion.functions.arrow_typeof(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the Arrow type of the expression.

+
+ +
+
+datafusion.functions.ascii(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the numeric code of the first character of the argument.

+
+ +
+
+datafusion.functions.asin(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the arc sine or inverse sine of a number.

+
+ +
+
+datafusion.functions.asinh(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns inverse hyperbolic sine.

+
+ +
+
+datafusion.functions.atan(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns inverse tangent of a number.

+
+ +
+
+datafusion.functions.atan2(y: datafusion.expr.Expr, x: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns inverse tangent of a division given in the argument.

+
+ +
+
+datafusion.functions.atanh(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns inverse hyperbolic tangent.

+
+ +
+
+datafusion.functions.avg(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Returns the average value.

+

This aggregate function expects a numeric expression and will return a float.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – Values to combine into an array

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.bit_and(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the bitwise AND of the argument.

+

This aggregate function will bitwise compare every value in the input partition.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – Argument to perform bitwise calculation on

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.bit_length(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the number of bits in the string argument.

+
+ +
+
+datafusion.functions.bit_or(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the bitwise OR of the argument.

+

This aggregate function will bitwise compare every value in the input partition.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – Argument to perform bitwise calculation on

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.bit_xor(expression: datafusion.expr.Expr, distinct: bool = False, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the bitwise XOR of the argument.

+

This aggregate function will bitwise compare every value in the input partition.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by and null_treatment.

+
+
Parameters:
+
    +
  • expression – Argument to perform bitwise calculation on

  • +
  • distinct – If True, evaluate each unique value of expression only once

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.bool_and(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the boolean AND of the argument.

+

This aggregate function will compare every value in the input partition. These are +expected to be boolean values.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – Argument to perform calculation on

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.bool_or(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the boolean OR of the argument.

+

This aggregate function will compare every value in the input partition. These are +expected to be boolean values.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – Argument to perform calculation on

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.btrim(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Removes all characters, spaces by default, from both sides of a string.

+
+ +
+
+datafusion.functions.cardinality(array: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the total number of elements in the array.

+
+ +
+
+datafusion.functions.case(expr: datafusion.expr.Expr) datafusion.expr.CaseBuilder
+

Create a case expression.

+

Create a CaseBuilder to match cases for the +expression expr. See CaseBuilder for +detailed usage.

+
+ +
+
+datafusion.functions.cbrt(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the cube root of a number.

+
+ +
+
+datafusion.functions.ceil(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the nearest integer greater than or equal to argument.

+
+ +
+
+datafusion.functions.char_length(string: datafusion.expr.Expr) datafusion.expr.Expr
+

The number of characters in the string.

+
+ +
+
+datafusion.functions.character_length(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the number of characters in the argument.

+
+ +
+
+datafusion.functions.chr(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts the Unicode code point to a UTF8 character.

+
+ +
+
+datafusion.functions.coalesce(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the value of the first expr in args which is not NULL.

+
+ +
+
+datafusion.functions.col(name: str) datafusion.expr.Expr
+

Creates a column reference expression.

+
+ +
+
+datafusion.functions.concat(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Concatenates the text representations of all the arguments.

+

NULL arguments are ignored.

+
+ +
+
+datafusion.functions.concat_ws(separator: str, *args: datafusion.expr.Expr) datafusion.expr.Expr
+

Concatenates the list args with the separator.

+

NULL arguments are ignored. separator should not be NULL.

+
+ +
+
+datafusion.functions.corr(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Returns the correlation coefficient between value1 and value2.

+

This aggregate function expects both values to be numeric and will return a float.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • value_y – The dependent variable for correlation

  • +
  • value_x – The independent variable for correlation

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.cos(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the cosine of the argument.

+
+ +
+
+datafusion.functions.cosh(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the hyperbolic cosine of the argument.

+
+ +
+
+datafusion.functions.cot(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the cotangent of the argument.

+
+ +
+
+datafusion.functions.count(expressions: datafusion.expr.Expr | list[datafusion.expr.Expr] | None = None, distinct: bool = False, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Returns the number of rows that match the given arguments.

+

This aggregate function will count the non-null rows provided in the expression.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by and null_treatment.

+
+
Parameters:
+
    +
  • expressions – Argument to perform bitwise calculation on

  • +
  • distinct – If True, a single entry for each distinct value will be in the result

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.count_star(filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Create a COUNT(1) aggregate expression.

+

This aggregate function will count all of the rows in the partition.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, distinct, and null_treatment.

+
+
Parameters:
+

filter – If provided, only count rows for which the filter is True

+
+
+
+ +
+
+datafusion.functions.covar(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the sample covariance.

+

This is an alias for covar_samp().

+
+ +
+
+datafusion.functions.covar_pop(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the population covariance.

+

This aggregate function expects both values to be numeric and will return a float.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • value_y – The dependent variable for covariance

  • +
  • value_x – The independent variable for covariance

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.covar_samp(value_y: datafusion.expr.Expr, value_x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the sample covariance.

+

This aggregate function expects both values to be numeric and will return a float.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • value_y – The dependent variable for covariance

  • +
  • value_x – The independent variable for covariance

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.cume_dist(partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) datafusion.expr.Expr
+

Create a cumulative distribution window function.

+

This window function is similar to rank() except that the returned values +are the ratio of the row number to the total number of rows. Here is an example of a +dataframe with a window ordered by descending points and the associated +cumulative distribution:

+
+--------+-----------+
+| points | cume_dist |
++--------+-----------+
+| 100    | 0.5       |
+| 100    | 0.5       |
+| 50     | 0.75      |
+| 25     | 1.0       |
++--------+-----------+
+
+
+
+
Parameters:
+
    +
  • partition_by – Expressions to partition the window frame on.

  • +
  • order_by – Set ordering within the window frame. Accepts +column names or expressions.

  • +
+
+
+

For example:

+
cume_dist(order_by="points")
+
+
+
+ +
+
+datafusion.functions.current_date() datafusion.expr.Expr
+

Returns current UTC date as a Date32 value.

+
+ +
+
+datafusion.functions.current_time() datafusion.expr.Expr
+

Returns current UTC time as a Time64 value.

+
+ +
+
+datafusion.functions.date_bin(stride: datafusion.expr.Expr, source: datafusion.expr.Expr, origin: datafusion.expr.Expr) datafusion.expr.Expr
+

Coerces an arbitrary timestamp to the start of the nearest specified interval.

+
+ +
+
+datafusion.functions.date_part(part: datafusion.expr.Expr, date: datafusion.expr.Expr) datafusion.expr.Expr
+

Extracts a subfield from the date.

+
+ +
+
+datafusion.functions.date_trunc(part: datafusion.expr.Expr, date: datafusion.expr.Expr) datafusion.expr.Expr
+

Truncates the date to a specified level of precision.

+
+ +
+
+datafusion.functions.datepart(part: datafusion.expr.Expr, date: datafusion.expr.Expr) datafusion.expr.Expr
+

Return a specified part of a date.

+

This is an alias for date_part().

+
+ +
+
+datafusion.functions.datetrunc(part: datafusion.expr.Expr, date: datafusion.expr.Expr) datafusion.expr.Expr
+

Truncates the date to a specified level of precision.

+

This is an alias for date_trunc().

+
+ +
+
+datafusion.functions.decode(expr: datafusion.expr.Expr, encoding: datafusion.expr.Expr) datafusion.expr.Expr
+

Decode the input, using the encoding. encoding can be base64 or hex.

+
+ +
+
+datafusion.functions.degrees(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts the argument from radians to degrees.

+
+ +
+
+datafusion.functions.dense_rank(partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) datafusion.expr.Expr
+

Create a dense_rank window function.

+

This window function is similar to rank() except that the returned values +will be consecutive. Here is an example of a dataframe with a window ordered by +descending points and the associated dense rank:

+
+--------+------------+
+| points | dense_rank |
++--------+------------+
+| 100    | 1          |
+| 100    | 1          |
+| 50     | 2          |
+| 25     | 3          |
++--------+------------+
+
+
+
+
Parameters:
+
    +
  • partition_by – Expressions to partition the window frame on.

  • +
  • order_by – Set ordering within the window frame. Accepts +column names or expressions.

  • +
+
+
+

For example:

+
dense_rank(order_by="points")
+
+
+
+ +
+
+datafusion.functions.digest(value: datafusion.expr.Expr, method: datafusion.expr.Expr) datafusion.expr.Expr
+

Computes the binary hash of an expression using the specified algorithm.

+

Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, +blake2b, and blake3.

+
+ +
+
+datafusion.functions.empty(array: datafusion.expr.Expr) datafusion.expr.Expr
+

This is an alias for array_empty().

+
+ +
+
+datafusion.functions.encode(expr: datafusion.expr.Expr, encoding: datafusion.expr.Expr) datafusion.expr.Expr
+

Encode the input, using the encoding. encoding can be base64 or hex.

+
+ +
+
+datafusion.functions.ends_with(arg: datafusion.expr.Expr, suffix: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns true if the string ends with the suffix, false otherwise.

+
+ +
+
+datafusion.functions.exp(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the exponential of the argument.

+
+ +
+
+datafusion.functions.extract(part: datafusion.expr.Expr, date: datafusion.expr.Expr) datafusion.expr.Expr
+

Extracts a subfield from the date.

+

This is an alias for date_part().

+
+ +
+
+datafusion.functions.factorial(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the factorial of the argument.

+
+ +
+
+datafusion.functions.find_in_set(string: datafusion.expr.Expr, string_list: datafusion.expr.Expr) datafusion.expr.Expr
+

Find a string in a list of strings.

+

Returns a value in the range of 1 to N if the string is in the string list +string_list consisting of N substrings.

+

The string list is a string composed of substrings separated by , characters.

+
+ +
+
+datafusion.functions.first_value(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None, null_treatment: datafusion.common.NullTreatment = NullTreatment.RESPECT_NULLS) datafusion.expr.Expr
+

Returns the first value in a group of values.

+

This aggregate function will return the first value in the partition.

+

If using the builder functions described in ref:_aggregation this function ignores +the option distinct.

+
+
Parameters:
+
    +
  • expression – Argument to perform bitwise calculation on

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
  • order_by – Set the ordering of the expression to evaluate. Accepts +column names or expressions.

  • +
  • null_treatment – Assign whether to respect or ignore null values.

  • +
+
+
+

For example:

+
df.aggregate([], first_value(col("a"), order_by="ts"))
+
+
+
+ +
+
+datafusion.functions.flatten(array: datafusion.expr.Expr) datafusion.expr.Expr
+

Flattens an array of arrays into a single array.

+
+ +
+
+datafusion.functions.floor(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the nearest integer less than or equal to the argument.

+
+ +
+
+datafusion.functions.from_unixtime(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts an integer to RFC3339 timestamp format string.

+
+ +
+
+datafusion.functions.gcd(x: datafusion.expr.Expr, y: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the greatest common divisor.

+
+ +
+
+datafusion.functions.in_list(arg: datafusion.expr.Expr, values: list[datafusion.expr.Expr], negated: bool = False) datafusion.expr.Expr
+

Returns whether the argument is contained within the list values.

+
+ +
+
+datafusion.functions.initcap(string: datafusion.expr.Expr) datafusion.expr.Expr
+

Set the initial letter of each word to capital.

+

Converts the first letter of each word in string to uppercase and the remaining +characters to lowercase.

+
+ +
+
+datafusion.functions.isnan(expr: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns true if a given number is +NaN or -NaN otherwise returns false.

+
+ +
+
+datafusion.functions.iszero(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns true if a given number is +0.0 or -0.0 otherwise returns false.

+
+ +
+
+datafusion.functions.lag(arg: datafusion.expr.Expr, shift_offset: int = 1, default_value: Any | None = None, partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) datafusion.expr.Expr
+

Create a lag window function.

+

Lag operation will return the argument that is in the previous shift_offset-th row +in the partition. For example lag(col("b"), shift_offset=3, default_value=5) +will return the 3rd previous value in column b. At the beginning of the +partition, where no values can be returned it will return the default value of 5.

+

Here is an example of both the lag and datafusion.functions.lead() +functions on a simple DataFrame:

+
+--------+------+-----+
+| points | lead | lag |
++--------+------+-----+
+| 100    | 100  |     |
+| 100    | 50   | 100 |
+| 50     | 25   | 100 |
+| 25     |      | 50  |
++--------+------+-----+
+
+
+
+
Parameters:
+
    +
  • arg – Value to return

  • +
  • shift_offset – Number of rows before the current row.

  • +
  • default_value – Value to return if shift_offet row does not exist.

  • +
  • partition_by – Expressions to partition the window frame on.

  • +
  • order_by – Set ordering within the window frame. Accepts +column names or expressions.

  • +
+
+
+

For example:

+
lag(col("b"), order_by="ts")
+
+
+
+ +
+
+datafusion.functions.last_value(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None, null_treatment: datafusion.common.NullTreatment = NullTreatment.RESPECT_NULLS) datafusion.expr.Expr
+

Returns the last value in a group of values.

+

This aggregate function will return the last value in the partition.

+

If using the builder functions described in ref:_aggregation this function ignores +the option distinct.

+
+
Parameters:
+
    +
  • expression – Argument to perform bitwise calculation on

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
  • order_by – Set the ordering of the expression to evaluate. Accepts +column names or expressions.

  • +
  • null_treatment – Assign whether to respect or ignore null values.

  • +
+
+
+

For example:

+
df.aggregate([], last_value(col("a"), order_by="ts"))
+
+
+
+ +
+
+datafusion.functions.lcm(x: datafusion.expr.Expr, y: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the least common multiple.

+
+ +
+
+datafusion.functions.lead(arg: datafusion.expr.Expr, shift_offset: int = 1, default_value: Any | None = None, partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) datafusion.expr.Expr
+

Create a lead window function.

+

Lead operation will return the argument that is in the next shift_offset-th row in +the partition. For example lead(col("b"), shift_offset=3, default_value=5) will +return the 3rd following value in column b. At the end of the partition, where +no further values can be returned it will return the default value of 5.

+

Here is an example of both the lead and datafusion.functions.lag() +functions on a simple DataFrame:

+
+--------+------+-----+
+| points | lead | lag |
++--------+------+-----+
+| 100    | 100  |     |
+| 100    | 50   | 100 |
+| 50     | 25   | 100 |
+| 25     |      | 50  |
++--------+------+-----+
+
+
+

To set window function parameters use the window builder approach described in the +ref:_window_functions online documentation.

+
+
Parameters:
+
    +
  • arg – Value to return

  • +
  • shift_offset – Number of rows following the current row.

  • +
  • default_value – Value to return if shift_offet row does not exist.

  • +
  • partition_by – Expressions to partition the window frame on.

  • +
  • order_by – Set ordering within the window frame. Accepts +column names or expressions.

  • +
+
+
+

For example:

+
lead(col("b"), order_by="ts")
+
+
+
+ +
+
+datafusion.functions.left(string: datafusion.expr.Expr, n: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the first n characters in the string.

+
+ +
+
+datafusion.functions.length(string: datafusion.expr.Expr) datafusion.expr.Expr
+

The number of characters in the string.

+
+ +
+
+datafusion.functions.levenshtein(string1: datafusion.expr.Expr, string2: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the Levenshtein distance between the two given strings.

+
+ +
+
+datafusion.functions.list_append(array: datafusion.expr.Expr, element: datafusion.expr.Expr) datafusion.expr.Expr
+

Appends an element to the end of an array.

+

This is an alias for array_append().

+
+ +
+
+datafusion.functions.list_cat(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Concatenates the input arrays.

+

This is an alias for array_concat(), array_cat().

+
+ +
+
+datafusion.functions.list_concat(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Concatenates the input arrays.

+

This is an alias for array_concat(), array_cat().

+
+ +
+
+datafusion.functions.list_dims(array: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns an array of the array’s dimensions.

+

This is an alias for array_dims().

+
+ +
+
+datafusion.functions.list_distinct(array: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns distinct values from the array after removing duplicates.

+

This is an alias for array_distinct().

+
+ +
+
+datafusion.functions.list_element(array: datafusion.expr.Expr, n: datafusion.expr.Expr) datafusion.expr.Expr
+

Extracts the element with the index n from the array.

+

This is an alias for array_element().

+
+ +
+
+datafusion.functions.list_except(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the elements that appear in array1 but not in the array2.

+

This is an alias for array_except().

+
+ +
+
+datafusion.functions.list_extract(array: datafusion.expr.Expr, n: datafusion.expr.Expr) datafusion.expr.Expr
+

Extracts the element with the index n from the array.

+

This is an alias for array_element().

+
+ +
+
+datafusion.functions.list_indexof(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) datafusion.expr.Expr
+

Return the position of the first occurrence of element in array.

+

This is an alias for array_position().

+
+ +
+
+datafusion.functions.list_intersect(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns an the intersection of array1 and array2.

+

This is an alias for array_intersect().

+
+ +
+
+datafusion.functions.list_join(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts each element to its text representation.

+

This is an alias for array_to_string().

+
+ +
+
+datafusion.functions.list_length(array: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the length of the array.

+

This is an alias for array_length().

+
+ +
+
+datafusion.functions.list_ndims(array: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the number of dimensions of the array.

+

This is an alias for array_ndims().

+
+ +
+
+datafusion.functions.list_position(array: datafusion.expr.Expr, element: datafusion.expr.Expr, index: int | None = 1) datafusion.expr.Expr
+

Return the position of the first occurrence of element in array.

+

This is an alias for array_position().

+
+ +
+
+datafusion.functions.list_positions(array: datafusion.expr.Expr, element: datafusion.expr.Expr) datafusion.expr.Expr
+

Searches for an element in the array and returns all occurrences.

+

This is an alias for array_positions().

+
+ +
+
+datafusion.functions.list_prepend(element: datafusion.expr.Expr, array: datafusion.expr.Expr) datafusion.expr.Expr
+

Prepends an element to the beginning of an array.

+

This is an alias for array_prepend().

+
+ +
+
+datafusion.functions.list_push_back(array: datafusion.expr.Expr, element: datafusion.expr.Expr) datafusion.expr.Expr
+

Appends an element to the end of an array.

+

This is an alias for array_append().

+
+ +
+
+datafusion.functions.list_push_front(element: datafusion.expr.Expr, array: datafusion.expr.Expr) datafusion.expr.Expr
+

Prepends an element to the beginning of an array.

+

This is an alias for array_prepend().

+
+ +
+
+datafusion.functions.list_remove(array: datafusion.expr.Expr, element: datafusion.expr.Expr) datafusion.expr.Expr
+

Removes the first element from the array equal to the given value.

+

This is an alias for array_remove().

+
+ +
+
+datafusion.functions.list_remove_all(array: datafusion.expr.Expr, element: datafusion.expr.Expr) datafusion.expr.Expr
+

Removes all elements from the array equal to the given value.

+

This is an alias for array_remove_all().

+
+ +
+
+datafusion.functions.list_remove_n(array: datafusion.expr.Expr, element: datafusion.expr.Expr, max: datafusion.expr.Expr) datafusion.expr.Expr
+

Removes the first max elements from the array equal to the given value.

+

This is an alias for array_remove_n().

+
+ +
+
+datafusion.functions.list_repeat(element: datafusion.expr.Expr, count: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns an array containing element count times.

+

This is an alias for array_repeat().

+
+ +
+
+datafusion.functions.list_replace(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) datafusion.expr.Expr
+

Replaces the first occurrence of from_val with to_val.

+

This is an alias for array_replace().

+
+ +
+
+datafusion.functions.list_replace_all(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) datafusion.expr.Expr
+

Replaces all occurrences of from_val with to_val.

+

This is an alias for array_replace_all().

+
+ +
+
+datafusion.functions.list_replace_n(array: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr, max: datafusion.expr.Expr) datafusion.expr.Expr
+

Replace n occurrences of from_val with to_val.

+

Replaces the first max occurrences of the specified element with another +specified element.

+

This is an alias for array_replace_n().

+
+ +
+
+datafusion.functions.list_resize(array: datafusion.expr.Expr, size: datafusion.expr.Expr, value: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns an array with the specified size filled.

+

If size is greater than the array length, the additional entries will be +filled with the given value. This is an alias for array_resize().

+
+ +
+
+datafusion.functions.list_slice(array: datafusion.expr.Expr, begin: datafusion.expr.Expr, end: datafusion.expr.Expr, stride: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Returns a slice of the array.

+

This is an alias for array_slice().

+
+ +
+
+datafusion.functions.list_sort(array: datafusion.expr.Expr, descending: bool = False, null_first: bool = False) datafusion.expr.Expr
+

This is an alias for array_sort().

+
+ +
+
+datafusion.functions.list_to_string(expr: datafusion.expr.Expr, delimiter: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts each element to its text representation.

+

This is an alias for array_to_string().

+
+ +
+
+datafusion.functions.list_union(array1: datafusion.expr.Expr, array2: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns an array of the elements in the union of array1 and array2.

+

Duplicate rows will not be returned.

+

This is an alias for array_union().

+
+ +
+
+datafusion.functions.ln(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the natural logarithm (base e) of the argument.

+
+ +
+
+datafusion.functions.log(base: datafusion.expr.Expr, num: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the logarithm of a number for a particular base.

+
+ +
+
+datafusion.functions.log10(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Base 10 logarithm of the argument.

+
+ +
+
+datafusion.functions.log2(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Base 2 logarithm of the argument.

+
+ +
+
+datafusion.functions.lower(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts a string to lowercase.

+
+ +
+
+datafusion.functions.lpad(string: datafusion.expr.Expr, count: datafusion.expr.Expr, characters: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Add left padding to a string.

+

Extends the string to length length by prepending the characters fill (a +space by default). If the string is already longer than length then it is +truncated (on the right).

+
+ +
+
+datafusion.functions.ltrim(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Removes all characters, spaces by default, from the beginning of a string.

+
+ +
+
+datafusion.functions.make_array(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns an array using the specified input expressions.

+
+ +
+
+datafusion.functions.make_date(year: datafusion.expr.Expr, month: datafusion.expr.Expr, day: datafusion.expr.Expr) datafusion.expr.Expr
+

Make a date from year, month and day component parts.

+
+ +
+
+datafusion.functions.make_list(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns an array using the specified input expressions.

+

This is an alias for make_array().

+
+ +
+
+datafusion.functions.max(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Aggregate function that returns the maximum value of the argument.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – The value to find the maximum of

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.md5(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Computes an MD5 128-bit checksum for a string expression.

+
+ +
+
+datafusion.functions.mean(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Returns the average (mean) value of the argument.

+

This is an alias for avg().

+
+ +
+
+datafusion.functions.median(expression: datafusion.expr.Expr, distinct: bool = False, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the median of a set of numbers.

+

This aggregate function returns the median value of the expression for the given +aggregate function.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by and null_treatment.

+
+
Parameters:
+
    +
  • expression – The value to compute the median of

  • +
  • distinct – If True, a single entry for each distinct value will be in the result

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.min(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Aggregate function that returns the minimum value of the argument.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – The value to find the minimum of

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.named_struct(name_pairs: list[tuple[str, datafusion.expr.Expr]]) datafusion.expr.Expr
+

Returns a struct with the given names and arguments pairs.

+
+ +
+
+datafusion.functions.nanvl(x: datafusion.expr.Expr, y: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns x if x is not NaN. Otherwise returns y.

+
+ +
+
+datafusion.functions.now() datafusion.expr.Expr
+

Returns the current timestamp in nanoseconds.

+

This will use the same value for all instances of now() in same statement.

+
+ +
+
+datafusion.functions.nth_value(expression: datafusion.expr.Expr, n: int, filter: datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None, null_treatment: datafusion.common.NullTreatment = NullTreatment.RESPECT_NULLS) datafusion.expr.Expr
+

Returns the n-th value in a group of values.

+

This aggregate function will return the n-th value in the partition.

+

If using the builder functions described in ref:_aggregation this function ignores +the option distinct.

+
+
Parameters:
+
    +
  • expression – Argument to perform bitwise calculation on

  • +
  • n – Index of value to return. Starts at 1.

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
  • order_by – Set the ordering of the expression to evaluate. Accepts +column names or expressions.

  • +
  • null_treatment – Assign whether to respect or ignore null values.

  • +
+
+
+

For example:

+
df.aggregate([], nth_value(col("a"), 2, order_by="ts"))
+
+
+
+ +
+
+datafusion.functions.ntile(groups: int, partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) datafusion.expr.Expr
+

Create a n-tile window function.

+

This window function orders the window frame into a give number of groups based on +the ordering criteria. It then returns which group the current row is assigned to. +Here is an example of a dataframe with a window ordered by descending points +and the associated n-tile function:

+
+--------+-------+
+| points | ntile |
++--------+-------+
+| 120    | 1     |
+| 100    | 1     |
+| 80     | 2     |
+| 60     | 2     |
+| 40     | 3     |
+| 20     | 3     |
++--------+-------+
+
+
+
+
Parameters:
+
    +
  • groups – Number of groups for the n-tile to be divided into.

  • +
  • partition_by – Expressions to partition the window frame on.

  • +
  • order_by – Set ordering within the window frame. Accepts +column names or expressions.

  • +
+
+
+

For example:

+
ntile(3, order_by="points")
+
+
+
+ +
+
+datafusion.functions.nullif(expr1: datafusion.expr.Expr, expr2: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns NULL if expr1 equals expr2; otherwise it returns expr1.

+

This can be used to perform the inverse operation of the COALESCE expression.

+
+ +
+
+datafusion.functions.nvl(x: datafusion.expr.Expr, y: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns x if x is not NULL. Otherwise returns y.

+
+ +
+
+datafusion.functions.octet_length(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the number of bytes of a string.

+
+ +
+
+datafusion.functions.order_by(expr: datafusion.expr.Expr, ascending: bool = True, nulls_first: bool = True) datafusion.expr.SortExpr
+

Creates a new sort expression.

+
+ +
+
+datafusion.functions.overlay(string: datafusion.expr.Expr, substring: datafusion.expr.Expr, start: datafusion.expr.Expr, length: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Replace a substring with a new substring.

+

Replace the substring of string that starts at the start’th character and +extends for length characters with new substring.

+
+ +
+
+datafusion.functions.percent_rank(partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) datafusion.expr.Expr
+

Create a percent_rank window function.

+

This window function is similar to rank() except that the returned values +are the percentage from 0.0 to 1.0 from first to last. Here is an example of a +dataframe with a window ordered by descending points and the associated percent +rank:

+
+--------+--------------+
+| points | percent_rank |
++--------+--------------+
+| 100    | 0.0          |
+| 100    | 0.0          |
+| 50     | 0.666667     |
+| 25     | 1.0          |
++--------+--------------+
+
+
+
+
Parameters:
+
    +
  • partition_by – Expressions to partition the window frame on.

  • +
  • order_by – Set ordering within the window frame. Accepts +column names or expressions.

  • +
+
+
+

For example:

+
percent_rank(order_by="points")
+
+
+
+ +
+
+datafusion.functions.pi() datafusion.expr.Expr
+

Returns an approximate value of π.

+
+ +
+
+datafusion.functions.pow(base: datafusion.expr.Expr, exponent: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns base raised to the power of exponent.

+

This is an alias of power().

+
+ +
+
+datafusion.functions.power(base: datafusion.expr.Expr, exponent: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns base raised to the power of exponent.

+
+ +
+
+datafusion.functions.radians(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts the argument from degrees to radians.

+
+ +
+
+datafusion.functions.random() datafusion.expr.Expr
+

Returns a random value in the range 0.0 <= x < 1.0.

+
+ +
+
+datafusion.functions.range(start: datafusion.expr.Expr, stop: datafusion.expr.Expr, step: datafusion.expr.Expr) datafusion.expr.Expr
+

Create a list of values in the range between start and stop.

+
+ +
+
+datafusion.functions.rank(partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) datafusion.expr.Expr
+

Create a rank window function.

+

Returns the rank based upon the window order. Consecutive equal values will receive +the same rank, but the next different value will not be consecutive but rather the +number of rows that precede it plus one. This is similar to Olympic medals. If two +people tie for gold, the next place is bronze. There would be no silver medal. Here +is an example of a dataframe with a window ordered by descending points and the +associated rank.

+

You should set order_by to produce meaningful results:

+
+--------+------+
+| points | rank |
++--------+------+
+| 100    | 1    |
+| 100    | 1    |
+| 50     | 3    |
+| 25     | 4    |
++--------+------+
+
+
+
+
Parameters:
+
    +
  • partition_by – Expressions to partition the window frame on.

  • +
  • order_by – Set ordering within the window frame. Accepts +column names or expressions.

  • +
+
+
+

For example:

+
rank(order_by="points")
+
+
+
+ +
+
+datafusion.functions.regexp_count(string: datafusion.expr.Expr, pattern: datafusion.expr.Expr, start: datafusion.expr.Expr | None = None, flags: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Returns the number of matches in a string.

+

Optional start position (the first position is 1) to search for the regular +expression.

+
+ +
+
+datafusion.functions.regexp_instr(values: datafusion.expr.Expr, regex: datafusion.expr.Expr, start: datafusion.expr.Expr | None = None, n: datafusion.expr.Expr | None = None, flags: datafusion.expr.Expr | None = None, sub_expr: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Returns the position of a regular expression match in a string.

+

Searches values for the n-th occurrence of regex, starting at position +start (the first position is 1). Returns the starting or ending position based +on end_position. Use flags to control regex behavior and sub_expr to +return the position of a specific capture group instead of the entire match.

+
+ +
+
+datafusion.functions.regexp_like(string: datafusion.expr.Expr, regex: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Find if any regular expression (regex) matches exist.

+

Tests a string using a regular expression returning true if at least one match, +false otherwise.

+
+ +
+
+datafusion.functions.regexp_match(string: datafusion.expr.Expr, regex: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Perform regular expression (regex) matching.

+

Returns an array with each element containing the leftmost-first match of the +corresponding index in regex to string in string.

+
+ +
+
+datafusion.functions.regexp_replace(string: datafusion.expr.Expr, pattern: datafusion.expr.Expr, replacement: datafusion.expr.Expr, flags: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Replaces substring(s) matching a PCRE-like regular expression.

+

The full list of supported features and syntax can be found at +<https://docs.rs/regex/latest/regex/#syntax>

+

Supported flags with the addition of ‘g’ can be found at +<https://docs.rs/regex/latest/regex/#grouping-and-flags>

+
+ +
+
+datafusion.functions.regr_avgx(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the average of the independent variable x.

+

This is a linear regression aggregate function. Only non-null pairs of the inputs +are evaluated.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • y – The linear regression dependent variable

  • +
  • x – The linear regression independent variable

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.regr_avgy(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the average of the dependent variable y.

+

This is a linear regression aggregate function. Only non-null pairs of the inputs +are evaluated.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • y – The linear regression dependent variable

  • +
  • x – The linear regression independent variable

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.regr_count(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Counts the number of rows in which both expressions are not null.

+

This is a linear regression aggregate function. Only non-null pairs of the inputs +are evaluated.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • y – The linear regression dependent variable

  • +
  • x – The linear regression independent variable

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.regr_intercept(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the intercept from the linear regression.

+

This is a linear regression aggregate function. Only non-null pairs of the inputs +are evaluated.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • y – The linear regression dependent variable

  • +
  • x – The linear regression independent variable

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.regr_r2(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the R-squared value from linear regression.

+

This is a linear regression aggregate function. Only non-null pairs of the inputs +are evaluated.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • y – The linear regression dependent variable

  • +
  • x – The linear regression independent variable

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.regr_slope(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the slope from linear regression.

+

This is a linear regression aggregate function. Only non-null pairs of the inputs +are evaluated.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • y – The linear regression dependent variable

  • +
  • x – The linear regression independent variable

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.regr_sxx(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the sum of squares of the independent variable x.

+

This is a linear regression aggregate function. Only non-null pairs of the inputs +are evaluated.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • y – The linear regression dependent variable

  • +
  • x – The linear regression independent variable

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.regr_sxy(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the sum of products of pairs of numbers.

+

This is a linear regression aggregate function. Only non-null pairs of the inputs +are evaluated.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • y – The linear regression dependent variable

  • +
  • x – The linear regression independent variable

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.regr_syy(y: datafusion.expr.Expr, x: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the sum of squares of the dependent variable y.

+

This is a linear regression aggregate function. Only non-null pairs of the inputs +are evaluated.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • y – The linear regression dependent variable

  • +
  • x – The linear regression independent variable

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.repeat(string: datafusion.expr.Expr, n: datafusion.expr.Expr) datafusion.expr.Expr
+

Repeats the string to n times.

+
+ +
+
+datafusion.functions.replace(string: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) datafusion.expr.Expr
+

Replaces all occurrences of from_val with to_val in the string.

+
+ +
+
+datafusion.functions.reverse(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Reverse the string argument.

+
+ +
+
+datafusion.functions.right(string: datafusion.expr.Expr, n: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the last n characters in the string.

+
+ +
+
+datafusion.functions.round(value: datafusion.expr.Expr, decimal_places: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Round the argument to the nearest integer.

+

If the optional decimal_places is specified, round to the nearest number of +decimal places. You can specify a negative number of decimal places. For example +round(lit(125.2345), lit(-2)) would yield a value of 100.0.

+
+ +
+
+datafusion.functions.row_number(partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) datafusion.expr.Expr
+

Create a row number window function.

+

Returns the row number of the window function.

+

Here is an example of the row_number on a simple DataFrame:

+
+--------+------------+
+| points | row number |
++--------+------------+
+| 100    | 1          |
+| 100    | 2          |
+| 50     | 3          |
+| 25     | 4          |
++--------+------------+
+
+
+
+
Parameters:
+
    +
  • partition_by – Expressions to partition the window frame on.

  • +
  • order_by – Set ordering within the window frame. Accepts +column names or expressions.

  • +
+
+
+

For example:

+
row_number(order_by="points")
+
+
+
+ +
+
+datafusion.functions.rpad(string: datafusion.expr.Expr, count: datafusion.expr.Expr, characters: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Add right padding to a string.

+

Extends the string to length length by appending the characters fill (a space +by default). If the string is already longer than length then it is truncated.

+
+ +
+
+datafusion.functions.rtrim(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Removes all characters, spaces by default, from the end of a string.

+
+ +
+
+datafusion.functions.sha224(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Computes the SHA-224 hash of a binary string.

+
+ +
+
+datafusion.functions.sha256(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Computes the SHA-256 hash of a binary string.

+
+ +
+
+datafusion.functions.sha384(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Computes the SHA-384 hash of a binary string.

+
+ +
+
+datafusion.functions.sha512(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Computes the SHA-512 hash of a binary string.

+
+ +
+
+datafusion.functions.signum(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the sign of the argument (-1, 0, +1).

+
+ +
+
+datafusion.functions.sin(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the sine of the argument.

+
+ +
+
+datafusion.functions.sinh(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the hyperbolic sine of the argument.

+
+ +
+
+datafusion.functions.split_part(string: datafusion.expr.Expr, delimiter: datafusion.expr.Expr, index: datafusion.expr.Expr) datafusion.expr.Expr
+

Split a string and return one part.

+

Splits a string based on a delimiter and picks out the desired field based +on the index.

+
+ +
+
+datafusion.functions.sqrt(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the square root of the argument.

+
+ +
+
+datafusion.functions.starts_with(string: datafusion.expr.Expr, prefix: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns true if string starts with prefix.

+
+ +
+
+datafusion.functions.stddev(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the standard deviation of the argument.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – The value to find the minimum of

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.stddev_pop(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the population standard deviation of the argument.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – The value to find the minimum of

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.stddev_samp(arg: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the sample standard deviation of the argument.

+

This is an alias for stddev().

+
+ +
+
+datafusion.functions.string_agg(expression: datafusion.expr.Expr, delimiter: str, filter: datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None) datafusion.expr.Expr
+

Concatenates the input strings.

+

This aggregate function will concatenate input strings, ignoring null values, and +separating them with the specified delimiter. Non-string values will be converted to +their string equivalents.

+

If using the builder functions described in ref:_aggregation this function ignores +the options distinct and null_treatment.

+
+
Parameters:
+
    +
  • expression – Argument to perform bitwise calculation on

  • +
  • delimiter – Text to place between each value of expression

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
  • order_by – Set the ordering of the expression to evaluate. Accepts +column names or expressions.

  • +
+
+
+

For example:

+
df.aggregate([], string_agg(col("a"), ",", order_by="b"))
+
+
+
+ +
+
+datafusion.functions.strpos(string: datafusion.expr.Expr, substring: datafusion.expr.Expr) datafusion.expr.Expr
+

Finds the position from where the substring matches the string.

+
+ +
+
+datafusion.functions.struct(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns a struct with the given arguments.

+
+ +
+
+datafusion.functions.substr(string: datafusion.expr.Expr, position: datafusion.expr.Expr) datafusion.expr.Expr
+

Substring from the position to the end.

+
+ +
+
+datafusion.functions.substr_index(string: datafusion.expr.Expr, delimiter: datafusion.expr.Expr, count: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns an indexed substring.

+

The return will be the string from before count occurrences of +delimiter.

+
+ +
+
+datafusion.functions.substring(string: datafusion.expr.Expr, position: datafusion.expr.Expr, length: datafusion.expr.Expr) datafusion.expr.Expr
+

Substring from the position with length characters.

+
+ +
+
+datafusion.functions.sum(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the sum of a set of numbers.

+

This aggregate function expects a numeric expression.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – Values to combine into an array

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.tan(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the tangent of the argument.

+
+ +
+
+datafusion.functions.tanh(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Returns the hyperbolic tangent of the argument.

+
+ +
+
+datafusion.functions.to_hex(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts an integer to a hexadecimal string.

+
+ +
+
+datafusion.functions.to_timestamp(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts a string and optional formats to a Timestamp in nanoseconds.

+

For usage of formatters see the rust chrono package strftime package.

+

[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)

+
+ +
+
+datafusion.functions.to_timestamp_micros(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts a string and optional formats to a Timestamp in microseconds.

+

See to_timestamp() for a description on how to use formatters.

+
+ +
+
+datafusion.functions.to_timestamp_millis(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts a string and optional formats to a Timestamp in milliseconds.

+

See to_timestamp() for a description on how to use formatters.

+
+ +
+
+datafusion.functions.to_timestamp_nanos(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts a string and optional formats to a Timestamp in nanoseconds.

+

See to_timestamp() for a description on how to use formatters.

+
+ +
+
+datafusion.functions.to_timestamp_seconds(arg: datafusion.expr.Expr, *formatters: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts a string and optional formats to a Timestamp in seconds.

+

See to_timestamp() for a description on how to use formatters.

+
+ +
+
+datafusion.functions.to_unixtime(string: datafusion.expr.Expr, *format_arguments: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts a string and optional formats to a Unixtime.

+
+ +
+
+datafusion.functions.translate(string: datafusion.expr.Expr, from_val: datafusion.expr.Expr, to_val: datafusion.expr.Expr) datafusion.expr.Expr
+

Replaces the characters in from_val with the counterpart in to_val.

+
+ +
+
+datafusion.functions.trim(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Removes all characters, spaces by default, from both sides of a string.

+
+ +
+
+datafusion.functions.trunc(num: datafusion.expr.Expr, precision: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Truncate the number toward zero with optional precision.

+
+ +
+
+datafusion.functions.upper(arg: datafusion.expr.Expr) datafusion.expr.Expr
+

Converts a string to uppercase.

+
+ +
+
+datafusion.functions.uuid() datafusion.expr.Expr
+

Returns uuid v4 as a string value.

+
+ +
+
+datafusion.functions.var(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the sample variance of the argument.

+

This is an alias for var_samp().

+
+ +
+
+datafusion.functions.var_pop(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the population variance of the argument.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – The variable to compute the variance for

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.var_samp(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the sample variance of the argument.

+

If using the builder functions described in ref:_aggregation this function ignores +the options order_by, null_treatment, and distinct.

+
+
Parameters:
+
    +
  • expression – The variable to compute the variance for

  • +
  • filter – If provided, only compute against rows for which the filter is True

  • +
+
+
+
+ +
+
+datafusion.functions.var_sample(expression: datafusion.expr.Expr, filter: datafusion.expr.Expr | None = None) datafusion.expr.Expr
+

Computes the sample variance of the argument.

+

This is an alias for var_samp().

+
+ +
+
+datafusion.functions.when(when: datafusion.expr.Expr, then: datafusion.expr.Expr) datafusion.expr.CaseBuilder
+

Create a case expression that has no base expression.

+

Create a CaseBuilder to match cases for the +expression expr. See CaseBuilder for +detailed usage.

+
+ +
+
+datafusion.functions.window(name: str, args: list[datafusion.expr.Expr], partition_by: list[datafusion.expr.Expr] | datafusion.expr.Expr | None = None, order_by: list[datafusion.expr.SortKey] | datafusion.expr.SortKey | None = None, window_frame: datafusion.expr.WindowFrame | None = None, filter: datafusion.expr.Expr | None = None, distinct: bool = False, ctx: datafusion.context.SessionContext | None = None) datafusion.expr.Expr
+

Creates a new Window function expression.

+

This interface will soon be deprecated. Instead of using this interface, +users should call the window functions directly. For example, to perform a +lag use:

+
df.select(functions.lag(col("a")).partition_by(col("b")).build())
+
+
+

The order_by parameter accepts column names or expressions, e.g.:

+
window("lag", [col("a")], order_by="ts")
+
+
+
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/html_formatter/index.html b/autoapi/datafusion/html_formatter/index.html new file mode 100644 index 000000000..d344fefca --- /dev/null +++ b/autoapi/datafusion/html_formatter/index.html @@ -0,0 +1,496 @@ + + + + + + + + datafusion.html_formatter — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ + + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

datafusion.html_formatter

+

Deprecated module for dataframe formatting.

+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/index.html b/autoapi/datafusion/index.html new file mode 100644 index 000000000..49b2d73fe --- /dev/null +++ b/autoapi/datafusion/index.html @@ -0,0 +1,6418 @@ + + + + + + + + datafusion — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

datafusion

+

DataFusion python package.

+

This is a Python library that binds to Apache Arrow in-memory query engine DataFusion. +See https://datafusion.apache.org/python for more information.

+
+

Submodules

+ +
+
+

Attributes

+ + + + + + + + + + + + + + + + + + + + + + + + +

DFSchema

col

column

udaf

udf

udtf

udwf

+
+
+

Classes

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Accumulator

Defines how an AggregateUDF accumulates values.

AggregateUDF

Class for performing scalar user-defined functions (UDF).

Catalog

DataFusion data catalog.

CsvReadOptions

Options for reading CSV files.

DataFrameWriteOptions

Writer options for DataFrame.

Database

See Schema.

ExecutionPlan

Represent nodes in the DataFusion Physical Plan.

Expr

Expression object.

InsertOp

Insert operation mode.

LogicalPlan

Logical Plan.

ParquetColumnOptions

Parquet options for individual columns.

ParquetWriterOptions

Advanced parquet writer options.

RecordBatch

This class is essentially a wrapper for pa.RecordBatch.

RecordBatchStream

This class represents a stream of record batches.

RuntimeEnvBuilder

Runtime configuration options.

SQLOptions

Options to be used when performing SQL queries.

ScalarUDF

Class for performing scalar user-defined functions (UDF).

SessionConfig

Session configuration options.

Table

A DataFusion table.

TableFunction

Class for performing user-defined table functions (UDTF).

WindowFrame

Defines a window frame for performing window operations.

WindowUDF

Class for performing window user-defined functions (UDF).

+
+
+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + +

configure_formatter(→ None)

Configure the global DataFrame HTML formatter.

lit(→ expr.Expr)

Create a literal expression.

literal(→ expr.Expr)

Create a literal expression.

read_avro(→ datafusion.dataframe.DataFrame)

Create a DataFrame for reading Avro data source.

read_csv(→ datafusion.dataframe.DataFrame)

Read a CSV data source.

read_json(→ datafusion.dataframe.DataFrame)

Read a line-delimited JSON data source.

read_parquet(→ datafusion.dataframe.DataFrame)

Read a Parquet source into a Dataframe.

+
+
+

Package Contents

+
+
+class datafusion.Accumulator
+

Defines how an AggregateUDF accumulates values.

+
+
+abstract evaluate() pyarrow.Scalar
+

Return the resultant value.

+

While this function template expects a PyArrow Scalar value return type, +you can return any value that can be converted into a Scalar. This +includes basic Python data types such as integers and strings. In +addition to primitive types, we currently support PyArrow, nanoarrow, +and arro3 objects in addition to primitive data types. Other objects +that support the Arrow FFI standard will be given a “best attempt” at +conversion to scalar objects.

+
+ +
+
+abstract merge(states: list[pyarrow.Array]) None
+

Merge a set of states.

+
+ +
+
+abstract state() list[pyarrow.Scalar]
+

Return the current state.

+

While this function template expects PyArrow Scalar values return type, +you can return any value that can be converted into a Scalar. This +includes basic Python data types such as integers and strings. In +addition to primitive types, we currently support PyArrow, nanoarrow, +and arro3 objects in addition to primitive data types. Other objects +that support the Arrow FFI standard will be given a “best attempt” at +conversion to scalar objects.

+
+ +
+
+abstract update(*values: pyarrow.Array) None
+

Evaluate an array of values and update state.

+
+ +
+ +
+
+class datafusion.AggregateUDF(name: str, accumulator: collections.abc.Callable[[], Accumulator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, state_type: list[pyarrow.DataType], volatility: Volatility | str)
+
+class datafusion.AggregateUDF(name: str, accumulator: AggregateUDFExportable, input_types: None = ..., return_type: None = ..., state_type: None = ..., volatility: None = ...)
+

Class for performing scalar user-defined functions (UDF).

+

Aggregate UDFs operate on a group of rows and return a single value. See +also ScalarUDF for operating on a row by row basis.

+

Instantiate a user-defined aggregate function (UDAF).

+

See udaf() for a convenience function and argument +descriptions.

+
+
+__call__(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Execute the UDAF.

+

This function is not typically called by an end user. These calls will +occur during the evaluation of the dataframe.

+
+ +
+
+__repr__() str
+

Print a string representation of the Aggregate UDF.

+
+ +
+
+static from_pycapsule(func: AggregateUDFExportable | _typeshed.CapsuleType) AggregateUDF
+

Create an Aggregate UDF from AggregateUDF PyCapsule object.

+

This function will instantiate a Aggregate UDF that uses a DataFusion +AggregateUDF that is exported via the FFI bindings.

+
+ +
+
+static udaf(input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, state_type: list[pyarrow.DataType], volatility: Volatility | str, name: str | None = None) collections.abc.Callable[Ellipsis, AggregateUDF]
+
+static udaf(accum: collections.abc.Callable[[], Accumulator], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, state_type: list[pyarrow.DataType], volatility: Volatility | str, name: str | None = None) AggregateUDF
+
+static udaf(accum: AggregateUDFExportable) AggregateUDF
+
+static udaf(accum: _typeshed.CapsuleType) AggregateUDF
+

Create a new User-Defined Aggregate Function (UDAF).

+

This class allows you to define an aggregate function that can be used in +data aggregation or window function calls.

+
+
Usage:
    +
  • As a function: udaf(accum, input_types, return_type, state_type, volatility, name).

  • +
  • As a decorator: @udaf(input_types, return_type, state_type, volatility, name). +When using udaf as a decorator, do not pass accum explicitly.

  • +
+
+
+

Function example:

+

If your Accumulator can be instantiated with no arguments, you +can simply pass it’s type as accum. If you need to pass additional +arguments to it’s constructor, you can define a lambda or a factory method. +During runtime the Accumulator will be constructed for every +instance in which this UDAF is used. The following examples are all valid:

+
import pyarrow as pa
+import pyarrow.compute as pc
+
+class Summarize(Accumulator):
+    def __init__(self, bias: float = 0.0):
+        self._sum = pa.scalar(bias)
+
+    def state(self) -> list[pa.Scalar]:
+        return [self._sum]
+
+    def update(self, values: pa.Array) -> None:
+        self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py())
+
+    def merge(self, states: list[pa.Array]) -> None:
+        self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py())
+
+    def evaluate(self) -> pa.Scalar:
+        return self._sum
+
+def sum_bias_10() -> Summarize:
+    return Summarize(10.0)
+
+udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()],
+    "immutable")
+udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()],
+    "immutable")
+udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(),
+    [pa.float64()], "immutable")
+
+
+

Decorator example::

+
@udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable")
+def udf4() -> Summarize:
+    return Summarize(10.0)
+
+
+
+
Parameters:
+
    +
  • accum – The accumulator python function. Only needed when calling as a +function. Skip this argument when using udaf as a decorator. +If you have a Rust backed AggregateUDF within a PyCapsule, you can +pass this parameter and ignore the rest. They will be determined +directly from the underlying function. See the online documentation +for more information.

  • +
  • input_types – The data types of the arguments to accum.

  • +
  • return_type – The data type of the return value.

  • +
  • state_type – The data types of the intermediate accumulation.

  • +
  • volatility – See Volatility for allowed values.

  • +
  • name – A descriptive name for the function.

  • +
+
+
Returns:
+

A user-defined aggregate function, which can be used in either data +aggregation or window function calls.

+
+
+
+ +
+
+_udaf
+
+ +
+ +
+
+class datafusion.Catalog(catalog: datafusion._internal.catalog.RawCatalog)
+

DataFusion data catalog.

+

This constructor is not typically called by the end user.

+
+
+__repr__() str
+

Print a string representation of the catalog.

+
+ +
+
+database(name: str = 'public') Schema
+

Returns the database with the given name from this catalog.

+
+ +
+
+deregister_schema(name: str, cascade: bool = True) Schema | None
+

Deregister a schema from this catalog.

+
+ +
+
+static memory_catalog(ctx: datafusion.SessionContext | None = None) Catalog
+

Create an in-memory catalog provider.

+
+ +
+
+names() set[str]
+

This is an alias for schema_names.

+
+ +
+
+register_schema(name: str, schema: Schema | SchemaProvider | SchemaProviderExportable) Schema | None
+

Register a schema with this catalog.

+
+ +
+
+schema(name: str = 'public') Schema
+

Returns the database with the given name from this catalog.

+
+ +
+
+schema_names() set[str]
+

Returns the list of schemas in this catalog.

+
+ +
+
+catalog
+
+ +
+ +
+
+class datafusion.CsvReadOptions(*, has_header: bool = True, delimiter: str = ',', quote: str = '"', terminator: str | None = None, escape: str | None = None, comment: str | None = None, newlines_in_values: bool = False, schema: pyarrow.Schema | None = None, schema_infer_max_records: int = DEFAULT_MAX_INFER_SCHEMA, file_extension: str = '.csv', table_partition_cols: list[tuple[str, pyarrow.DataType]] | None = None, file_compression_type: str = '', file_sort_order: list[list[datafusion.expr.SortExpr]] | None = None, null_regex: str | None = None, truncated_rows: bool = False)
+

Options for reading CSV files.

+

This class provides a builder pattern for configuring CSV reading options. +All methods starting with with_ return self to allow method chaining.

+

Initialize CsvReadOptions.

+
+
Parameters:
+
    +
  • has_header – Does the CSV file have a header row? If schema inference +is run on a file with no headers, default column names are created.

  • +
  • delimiter – Column delimiter character. Must be a single ASCII character.

  • +
  • quote – Quote character for fields containing delimiters or newlines. +Must be a single ASCII character.

  • +
  • terminator – Optional line terminator character. If None, uses CRLF. +Must be a single ASCII character.

  • +
  • escape – Optional escape character for quotes. Must be a single ASCII +character.

  • +
  • comment – If specified, lines beginning with this character are ignored. +Must be a single ASCII character.

  • +
  • newlines_in_values – Whether newlines in quoted values are supported. +Parsing newlines in quoted values may be affected by execution +behavior such as parallel file scanning. Setting this to True +ensures that newlines in values are parsed successfully, which may +reduce performance.

  • +
  • schema – Optional PyArrow schema representing the CSV files. If None, +the CSV reader will try to infer it based on data in the file.

  • +
  • schema_infer_max_records – Maximum number of rows to read from CSV files +for schema inference if needed.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • table_partition_cols – Partition columns as a list of tuples of +(column_name, data_type).

  • +
  • file_compression_type – File compression type. Supported values are +"gzip", "bz2", "xz", "zstd", or empty string for +uncompressed.

  • +
  • file_sort_order – Optional sort order of the files as a list of sort +expressions per file.

  • +
  • null_regex – Optional regex pattern to match null values in the CSV.

  • +
  • truncated_rows – Whether to allow truncated rows when parsing. By default +this is False and will error if the CSV rows have different +lengths. When set to True, it will allow records with less than +the expected number of columns and fill the missing columns with +nulls. If the record’s schema is not nullable, it will still return +an error.

  • +
+
+
+
+
+to_inner() datafusion._internal.options.CsvReadOptions
+

Convert this object into the underlying Rust structure.

+

This is intended for internal use only.

+
+ +
+
+with_comment(comment: str | None) CsvReadOptions
+

Configure the comment character.

+
+ +
+
+with_delimiter(delimiter: str) CsvReadOptions
+

Configure the column delimiter.

+
+ +
+
+with_escape(escape: str | None) CsvReadOptions
+

Configure the escape character.

+
+ +
+
+with_file_compression_type(file_compression_type: str) CsvReadOptions
+

Configure file compression type.

+
+ +
+
+with_file_extension(file_extension: str) CsvReadOptions
+

Configure the file extension filter.

+
+ +
+
+with_file_sort_order(file_sort_order: list[list[datafusion.expr.SortExpr]]) CsvReadOptions
+

Configure file sort order.

+
+ +
+
+with_has_header(has_header: bool) CsvReadOptions
+

Configure whether the CSV has a header row.

+
+ +
+
+with_newlines_in_values(newlines_in_values: bool) CsvReadOptions
+

Configure whether newlines in values are supported.

+
+ +
+
+with_null_regex(null_regex: str | None) CsvReadOptions
+

Configure null value regex pattern.

+
+ +
+
+with_quote(quote: str) CsvReadOptions
+

Configure the quote character.

+
+ +
+
+with_schema(schema: pyarrow.Schema | None) CsvReadOptions
+

Configure the schema.

+
+ +
+
+with_schema_infer_max_records(schema_infer_max_records: int) CsvReadOptions
+

Configure maximum records for schema inference.

+
+ +
+
+with_table_partition_cols(table_partition_cols: list[tuple[str, pyarrow.DataType]]) CsvReadOptions
+

Configure table partition columns.

+
+ +
+
+with_terminator(terminator: str | None) CsvReadOptions
+

Configure the line terminator character.

+
+ +
+
+with_truncated_rows(truncated_rows: bool) CsvReadOptions
+

Configure whether to allow truncated rows.

+
+ +
+
+comment = None
+
+ +
+
+delimiter = ','
+
+ +
+
+escape = None
+
+ +
+
+file_compression_type = ''
+
+ +
+
+file_extension = '.csv'
+
+ +
+
+file_sort_order = []
+
+ +
+
+has_header = True
+
+ +
+
+newlines_in_values = False
+
+ +
+
+null_regex = None
+
+ +
+
+quote = '"'
+
+ +
+
+schema = None
+
+ +
+
+schema_infer_max_records = 1000
+
+ +
+
+table_partition_cols = []
+
+ +
+
+terminator = None
+
+ +
+
+truncated_rows = False
+
+ +
+ +
+
+class datafusion.DataFrameWriteOptions(insert_operation: InsertOp | None = None, single_file_output: bool = False, partition_by: str | collections.abc.Sequence[str] | None = None, sort_by: datafusion.expr.Expr | datafusion.expr.SortExpr | collections.abc.Sequence[datafusion.expr.Expr] | collections.abc.Sequence[datafusion.expr.SortExpr] | None = None)
+

Writer options for DataFrame.

+

There is no guarantee the table provider supports all writer options. +See the individual implementation and documentation for details.

+

Instantiate writer options for DataFrame.

+
+
+_raw_write_options
+
+ +
+ +
+
+class datafusion.Database(schema: datafusion._internal.catalog.RawSchema)
+

Bases: Schema

+

See Schema.

+

This constructor is not typically called by the end user.

+
+ +
+
+class datafusion.ExecutionPlan(plan: datafusion._internal.ExecutionPlan)
+

Represent nodes in the DataFusion Physical Plan.

+

This constructor should not be called by the end user.

+
+
+__repr__() str
+

Print a string representation of the physical plan.

+
+ +
+
+children() list[ExecutionPlan]
+

Get a list of children ExecutionPlan that act as inputs to this plan.

+

The returned list will be empty for leaf nodes such as scans, will contain a +single value for unary nodes, or two values for binary nodes (such as joins).

+
+ +
+
+display() str
+

Print the physical plan.

+
+ +
+
+display_indent() str
+

Print an indented form of the physical plan.

+
+ +
+
+static from_proto(ctx: datafusion.context.SessionContext, data: bytes) ExecutionPlan
+

Create an ExecutionPlan from protobuf bytes.

+

Tables created in memory from record batches are currently not supported.

+
+ +
+
+to_proto() bytes
+

Convert an ExecutionPlan into protobuf bytes.

+

Tables created in memory from record batches are currently not supported.

+
+ +
+
+_raw_plan
+
+ +
+
+property partition_count: int
+

Returns the number of partitions in the physical plan.

+
+ +
+ +
+
+class datafusion.Expr(expr: datafusion._internal.expr.RawExpr)
+

Expression object.

+

Expressions are one of the core concepts in DataFusion. See +Expressions in the online documentation for more information.

+

This constructor should not be called by the end user.

+
+
+__add__(rhs: Any) Expr
+

Addition operator.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__and__(rhs: Expr) Expr
+

Logical AND.

+
+ +
+
+__eq__(rhs: object) Expr
+

Equal to.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__ge__(rhs: Any) Expr
+

Greater than or equal to.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__getitem__(key: str | int) Expr
+

Retrieve sub-object.

+

If key is a string, returns the subfield of the struct. +If key is an integer, retrieves the element in the array. Note that the +element index begins at 0, unlike +array_element() which begins at 1. +If key is a slice, returns an array that contains a slice of the +original array. Similar to integer indexing, this follows Python convention +where the index begins at 0 unlike +array_slice() which begins at 1.

+
+ +
+
+__gt__(rhs: Any) Expr
+

Greater than.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__invert__() Expr
+

Binary not (~).

+
+ +
+
+__le__(rhs: Any) Expr
+

Less than or equal to.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__lt__(rhs: Any) Expr
+

Less than.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__mod__(rhs: Any) Expr
+

Modulo operator (%).

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__mul__(rhs: Any) Expr
+

Multiplication operator.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__ne__(rhs: object) Expr
+

Not equal to.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__or__(rhs: Expr) Expr
+

Logical OR.

+
+ +
+
+__repr__() str
+

Generate a string representation of this expression.

+
+ +
+
+__richcmp__(other: Expr, op: int) Expr
+

Comparison operator.

+
+ +
+
+__sub__(rhs: Any) Expr
+

Subtraction operator.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+__truediv__(rhs: Any) Expr
+

Division operator.

+

Accepts either an expression or any valid PyArrow scalar literal value.

+
+ +
+
+abs() Expr
+

Return the absolute value of a given number.

+
+

Returns:

+
+
Expr

A new expression representing the absolute value of the input expression.

+
+
+
+
+ +
+
+acos() Expr
+

Returns the arc cosine or inverse cosine of a number.

+
+

Returns:

+
+
Expr

A new expression representing the arc cosine of the input expression.

+
+
+
+
+ +
+
+acosh() Expr
+

Returns inverse hyperbolic cosine.

+
+ +
+
+alias(name: str, metadata: dict[str, str] | None = None) Expr
+

Assign a name to the expression.

+
+
Parameters:
+
    +
  • name – The name to assign to the expression.

  • +
  • metadata – Optional metadata to attach to the expression.

  • +
+
+
Returns:
+

A new expression with the assigned name.

+
+
+
+ +
+
+array_dims() Expr
+

Returns an array of the array’s dimensions.

+
+ +
+
+array_distinct() Expr
+

Returns distinct values from the array after removing duplicates.

+
+ +
+
+array_empty() Expr
+

Returns a boolean indicating whether the array is empty.

+
+ +
+
+array_length() Expr
+

Returns the length of the array.

+
+ +
+
+array_ndims() Expr
+

Returns the number of dimensions of the array.

+
+ +
+
+array_pop_back() Expr
+

Returns the array without the last element.

+
+ +
+
+array_pop_front() Expr
+

Returns the array without the first element.

+
+ +
+
+arrow_typeof() Expr
+

Returns the Arrow type of the expression.

+
+ +
+
+ascii() Expr
+

Returns the numeric code of the first character of the argument.

+
+ +
+
+asin() Expr
+

Returns the arc sine or inverse sine of a number.

+
+ +
+
+asinh() Expr
+

Returns inverse hyperbolic sine.

+
+ +
+
+atan() Expr
+

Returns inverse tangent of a number.

+
+ +
+
+atanh() Expr
+

Returns inverse hyperbolic tangent.

+
+ +
+
+between(low: Any, high: Any, negated: bool = False) Expr
+

Returns True if this expression is between a given range.

+
+
Parameters:
+
    +
  • low – lower bound of the range (inclusive).

  • +
  • high – higher bound of the range (inclusive).

  • +
  • negated – negates whether the expression is between a given range

  • +
+
+
+
+ +
+
+bit_length() Expr
+

Returns the number of bits in the string argument.

+
+ +
+
+btrim() Expr
+

Removes all characters, spaces by default, from both sides of a string.

+
+ +
+
+canonical_name() str
+

Returns a complete string representation of this expression.

+
+ +
+
+cardinality() Expr
+

Returns the total number of elements in the array.

+
+ +
+
+cast(to: pyarrow.DataType[Any] | type) Expr
+

Cast to a new data type.

+
+ +
+
+cbrt() Expr
+

Returns the cube root of a number.

+
+ +
+
+ceil() Expr
+

Returns the nearest integer greater than or equal to argument.

+
+ +
+
+char_length() Expr
+

The number of characters in the string.

+
+ +
+
+character_length() Expr
+

Returns the number of characters in the argument.

+
+ +
+
+chr() Expr
+

Converts the Unicode code point to a UTF8 character.

+
+ +
+
+static column(value: str) Expr
+

Creates a new expression representing a column.

+
+ +
+
+column_name(plan: datafusion.plan.LogicalPlan) str
+

Compute the output column name based on the provided logical plan.

+
+ +
+
+cos() Expr
+

Returns the cosine of the argument.

+
+ +
+
+cosh() Expr
+

Returns the hyperbolic cosine of the argument.

+
+ +
+
+cot() Expr
+

Returns the cotangent of the argument.

+
+ +
+
+degrees() Expr
+

Converts the argument from radians to degrees.

+
+ +
+
+display_name() str
+

Returns the name of this expression as it should appear in a schema.

+

This name will not include any CAST expressions.

+
+ +
+
+distinct() ExprFuncBuilder
+

Only evaluate distinct values for an aggregate function.

+

This function will create an ExprFuncBuilder that can be used to +set parameters for either window or aggregate functions. If used on any other +type of expression, an error will be generated when build() is called.

+
+ +
+
+empty() Expr
+

This is an alias for array_empty().

+
+ +
+
+exp() Expr
+

Returns the exponential of the argument.

+
+ +
+
+factorial() Expr
+

Returns the factorial of the argument.

+
+ +
+
+fill_nan(value: Any | Expr | None = None) Expr
+

Fill NaN values with a provided value.

+
+ +
+
+fill_null(value: Any | Expr | None = None) Expr
+

Fill NULL values with a provided value.

+
+ +
+
+filter(filter: Expr) ExprFuncBuilder
+

Filter an aggregate function.

+

This function will create an ExprFuncBuilder that can be used to +set parameters for either window or aggregate functions. If used on any other +type of expression, an error will be generated when build() is called.

+
+ +
+
+flatten() Expr
+

Flattens an array of arrays into a single array.

+
+ +
+
+floor() Expr
+

Returns the nearest integer less than or equal to the argument.

+
+ +
+
+from_unixtime() Expr
+

Converts an integer to RFC3339 timestamp format string.

+
+ +
+
+initcap() Expr
+

Set the initial letter of each word to capital.

+

Converts the first letter of each word in string to uppercase and the +remaining characters to lowercase.

+
+ +
+
+is_not_null() Expr
+

Returns True if this expression is not null.

+
+ +
+
+is_null() Expr
+

Returns True if this expression is null.

+
+ +
+
+isnan() Expr
+

Returns true if a given number is +NaN or -NaN otherwise returns false.

+
+ +
+
+iszero() Expr
+

Returns true if a given number is +0.0 or -0.0 otherwise returns false.

+
+ +
+
+length() Expr
+

The number of characters in the string.

+
+ +
+
+list_dims() Expr
+

Returns an array of the array’s dimensions.

+

This is an alias for array_dims().

+
+ +
+
+list_distinct() Expr
+

Returns distinct values from the array after removing duplicates.

+

This is an alias for array_distinct().

+
+ +
+
+list_length() Expr
+

Returns the length of the array.

+

This is an alias for array_length().

+
+ +
+
+list_ndims() Expr
+

Returns the number of dimensions of the array.

+

This is an alias for array_ndims().

+
+ +
+
+static literal(value: Any) Expr
+

Creates a new expression representing a scalar value.

+

value must be a valid PyArrow scalar value or easily castable to one.

+
+ +
+
+static literal_with_metadata(value: Any, metadata: dict[str, str]) Expr
+

Creates a new expression representing a scalar value with metadata.

+
+
Parameters:
+
    +
  • value – A valid PyArrow scalar value or easily castable to one.

  • +
  • metadata – Metadata to attach to the expression.

  • +
+
+
+
+ +
+
+ln() Expr
+

Returns the natural logarithm (base e) of the argument.

+
+ +
+
+log10() Expr
+

Base 10 logarithm of the argument.

+
+ +
+
+log2() Expr
+

Base 2 logarithm of the argument.

+
+ +
+
+lower() Expr
+

Converts a string to lowercase.

+
+ +
+
+ltrim() Expr
+

Removes all characters, spaces by default, from the beginning of a string.

+
+ +
+
+md5() Expr
+

Computes an MD5 128-bit checksum for a string expression.

+
+ +
+
+null_treatment(null_treatment: datafusion.common.NullTreatment) ExprFuncBuilder
+

Set the treatment for null values for a window or aggregate function.

+

This function will create an ExprFuncBuilder that can be used to +set parameters for either window or aggregate functions. If used on any other +type of expression, an error will be generated when build() is called.

+
+ +
+
+octet_length() Expr
+

Returns the number of bytes of a string.

+
+ +
+
+order_by(*exprs: Expr | SortExpr) ExprFuncBuilder
+

Set the ordering for a window or aggregate function.

+

This function will create an ExprFuncBuilder that can be used to +set parameters for either window or aggregate functions. If used on any other +type of expression, an error will be generated when build() is called.

+
+ +
+
+over(window: Window) Expr
+

Turn an aggregate function into a window function.

+

This function turns any aggregate function into a window function. With the +exception of partition_by, how each of the parameters is used is determined +by the underlying aggregate function.

+
+
Parameters:
+

window – Window definition

+
+
+
+ +
+
+partition_by(*partition_by: Expr) ExprFuncBuilder
+

Set the partitioning for a window function.

+

This function will create an ExprFuncBuilder that can be used to +set parameters for either window or aggregate functions. If used on any other +type of expression, an error will be generated when build() is called.

+
+ +
+
+python_value() Any
+

Extracts the Expr value into Any.

+

This is only valid for literal expressions.

+
+
Returns:
+

Python object representing literal value of the expression.

+
+
+
+ +
+
+radians() Expr
+

Converts the argument from degrees to radians.

+
+ +
+
+reverse() Expr
+

Reverse the string argument.

+
+ +
+
+rex_call_operands() list[Expr]
+

Return the operands of the expression based on it’s variant type.

+

Row expressions, Rex(s), operate on the concept of operands. Different +variants of Expressions, Expr(s), store those operands in different +datastructures. This function examines the Expr variant and returns +the operands to the calling logic.

+
+ +
+
+rex_call_operator() str
+

Extracts the operator associated with a row expression type call.

+
+ +
+
+rex_type() datafusion.common.RexType
+

Return the Rex Type of this expression.

+

A Rex (Row Expression) specifies a single row of data.That specification +could include user defined functions or types. RexType identifies the +row as one of the possible valid RexType.

+
+ +
+
+rtrim() Expr
+

Removes all characters, spaces by default, from the end of a string.

+
+ +
+
+schema_name() str
+

Returns the name of this expression as it should appear in a schema.

+

This name will not include any CAST expressions.

+
+ +
+
+sha224() Expr
+

Computes the SHA-224 hash of a binary string.

+
+ +
+
+sha256() Expr
+

Computes the SHA-256 hash of a binary string.

+
+ +
+
+sha384() Expr
+

Computes the SHA-384 hash of a binary string.

+
+ +
+
+sha512() Expr
+

Computes the SHA-512 hash of a binary string.

+
+ +
+
+signum() Expr
+

Returns the sign of the argument (-1, 0, +1).

+
+ +
+
+sin() Expr
+

Returns the sine of the argument.

+
+ +
+
+sinh() Expr
+

Returns the hyperbolic sine of the argument.

+
+ +
+
+sort(ascending: bool = True, nulls_first: bool = True) SortExpr
+

Creates a sort Expr from an existing Expr.

+
+
Parameters:
+
    +
  • ascending – If true, sort in ascending order.

  • +
  • nulls_first – Return null values first.

  • +
+
+
+
+ +
+
+sqrt() Expr
+

Returns the square root of the argument.

+
+ +
+
+static string_literal(value: str) Expr
+

Creates a new expression representing a UTF8 literal value.

+

It is different from literal because it is pa.string() instead of +pa.string_view()

+

This is needed for cases where DataFusion is expecting a UTF8 instead of +UTF8View literal, like in: +https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179

+
+ +
+
+tan() Expr
+

Returns the tangent of the argument.

+
+ +
+
+tanh() Expr
+

Returns the hyperbolic tangent of the argument.

+
+ +
+
+to_hex() Expr
+

Converts an integer to a hexadecimal string.

+
+ +
+
+to_variant() Any
+

Convert this expression into a python object if possible.

+
+ +
+
+trim() Expr
+

Removes all characters, spaces by default, from both sides of a string.

+
+ +
+
+types() datafusion.common.DataTypeMap
+

Return the DataTypeMap.

+
+
Returns:
+

DataTypeMap which represents the PythonType, Arrow DataType, and +SqlType Enum which this expression represents.

+
+
+
+ +
+
+upper() Expr
+

Converts a string to uppercase.

+
+ +
+
+variant_name() str
+

Returns the name of the Expr variant.

+

Ex: IsNotNull, Literal, BinaryExpr, etc

+
+ +
+
+window_frame(window_frame: WindowFrame) ExprFuncBuilder
+

Set the frame fora window function.

+

This function will create an ExprFuncBuilder that can be used to +set parameters for either window or aggregate functions. If used on any other +type of expression, an error will be generated when build() is called.

+
+ +
+
+__radd__
+
+ +
+
+__rand__
+
+ +
+
+__rmod__
+
+ +
+
+__rmul__
+
+ +
+
+__ror__
+
+ +
+
+__rsub__
+
+ +
+
+__rtruediv__
+
+ +
+
+_to_pyarrow_types: ClassVar[dict[type, pyarrow.DataType]]
+
+ +
+
+expr
+
+ +
+ +
+
+class datafusion.InsertOp
+

Bases: enum.Enum

+

Insert operation mode.

+

These modes are used by the table writing feature to define how record +batches should be written to a table.

+
+
+APPEND
+

Appends new rows to the existing table without modifying any existing rows.

+
+ +
+
+OVERWRITE
+

Overwrites all existing rows in the table with the new rows.

+
+ +
+
+REPLACE
+

Replace existing rows that collide with the inserted rows.

+

Replacement is typically based on a unique key or primary key.

+
+ +
+ +
+
+class datafusion.LogicalPlan(plan: datafusion._internal.LogicalPlan)
+

Logical Plan.

+

A LogicalPlan is a node in a tree of relational operators (such as +Projection or Filter).

+

Represents transforming an input relation (table) to an output relation +(table) with a potentially different schema. Plans form a dataflow tree +where data flows from leaves up to the root to produce the query result.

+

A LogicalPlan can be created by the SQL query planner, the DataFrame API, +or programmatically (for example custom query languages).

+

This constructor should not be called by the end user.

+
+
+__eq__(other: LogicalPlan) bool
+

Test equality.

+
+ +
+
+__repr__() str
+

Generate a printable representation of the plan.

+
+ +
+
+display() str
+

Print the logical plan.

+
+ +
+
+display_graphviz() str
+

Print the graph visualization of the logical plan.

+

Returns a format`able structure that produces lines meant for graphical display +using the `DOT language. This format can be visualized using software from +[graphviz](https://graphviz.org/)

+
+ +
+
+display_indent() str
+

Print an indented form of the logical plan.

+
+ +
+
+display_indent_schema() str
+

Print an indented form of the schema for the logical plan.

+
+ +
+
+static from_proto(ctx: datafusion.context.SessionContext, data: bytes) LogicalPlan
+

Create a LogicalPlan from protobuf bytes.

+

Tables created in memory from record batches are currently not supported.

+
+ +
+
+inputs() list[LogicalPlan]
+

Returns the list of inputs to the logical plan.

+
+ +
+
+to_proto() bytes
+

Convert a LogicalPlan to protobuf bytes.

+

Tables created in memory from record batches are currently not supported.

+
+ +
+
+to_variant() Any
+

Convert the logical plan into its specific variant.

+
+ +
+
+_raw_plan
+
+ +
+ +
+
+class datafusion.ParquetColumnOptions(encoding: str | None = None, dictionary_enabled: bool | None = None, compression: str | None = None, statistics_enabled: str | None = None, bloom_filter_enabled: bool | None = None, bloom_filter_fpp: float | None = None, bloom_filter_ndv: int | None = None)
+

Parquet options for individual columns.

+

Contains the available options that can be applied for an individual Parquet column, +replacing the global options in ParquetWriterOptions.

+

Initialize the ParquetColumnOptions.

+
+
Parameters:
+
    +
  • encoding – Sets encoding for the column path. Valid values are: plain, +plain_dictionary, rle, bit_packed, delta_binary_packed, +delta_length_byte_array, delta_byte_array, rle_dictionary, +and byte_stream_split. These values are not case-sensitive. If +None, uses the default parquet options

  • +
  • dictionary_enabled – Sets if dictionary encoding is enabled for the column +path. If None, uses the default parquet options

  • +
  • compression – Sets default parquet compression codec for the column path. +Valid values are uncompressed, snappy, gzip(level), lzo, +brotli(level), lz4, zstd(level), and lz4_raw. These +values are not case-sensitive. If None, uses the default parquet +options.

  • +
  • statistics_enabled – Sets if statistics are enabled for the column Valid +values are: none, chunk, and page These values are not case +sensitive. If None, uses the default parquet options.

  • +
  • bloom_filter_enabled – Sets if bloom filter is enabled for the column path. +If None, uses the default parquet options.

  • +
  • bloom_filter_fpp – Sets bloom filter false positive probability for the +column path. If None, uses the default parquet options.

  • +
  • bloom_filter_ndv – Sets bloom filter number of distinct values. If None, +uses the default parquet options.

  • +
+
+
+
+
+bloom_filter_enabled = None
+
+ +
+
+bloom_filter_fpp = None
+
+ +
+
+bloom_filter_ndv = None
+
+ +
+
+compression = None
+
+ +
+
+dictionary_enabled = None
+
+ +
+
+encoding = None
+
+ +
+
+statistics_enabled = None
+
+ +
+ +
+
+class datafusion.ParquetWriterOptions(data_pagesize_limit: int = 1024 * 1024, write_batch_size: int = 1024, writer_version: str = '1.0', skip_arrow_metadata: bool = False, compression: str | None = 'zstd(3)', compression_level: int | None = None, dictionary_enabled: bool | None = True, dictionary_page_size_limit: int = 1024 * 1024, statistics_enabled: str | None = 'page', max_row_group_size: int = 1024 * 1024, created_by: str = 'datafusion-python', column_index_truncate_length: int | None = 64, statistics_truncate_length: int | None = None, data_page_row_count_limit: int = 20000, encoding: str | None = None, bloom_filter_on_write: bool = False, bloom_filter_fpp: float | None = None, bloom_filter_ndv: int | None = None, allow_single_file_parallelism: bool = True, maximum_parallel_row_group_writers: int = 1, maximum_buffered_record_batches_per_stream: int = 2, column_specific_options: dict[str, ParquetColumnOptions] | None = None)
+

Advanced parquet writer options.

+

Allows settings the writer options that apply to the entire file. Some options can +also be set on a column by column basis, with the field column_specific_options +(see ParquetColumnOptions).

+

Initialize the ParquetWriterOptions.

+
+
Parameters:
+
    +
  • data_pagesize_limit – Sets best effort maximum size of data page in bytes.

  • +
  • write_batch_size – Sets write_batch_size in bytes.

  • +
  • writer_version – Sets parquet writer version. Valid values are 1.0 and +2.0.

  • +
  • skip_arrow_metadata – Skip encoding the embedded arrow metadata in the +KV_meta.

  • +
  • compression

    Compression type to use. Default is zstd(3). +Available compression types are

    +
      +
    • uncompressed: No compression.

    • +
    • snappy: Snappy compression.

    • +
    • gzip(n): Gzip compression with level n.

    • +
    • brotli(n): Brotli compression with level n.

    • +
    • lz4: LZ4 compression.

    • +
    • lz4_raw: LZ4_RAW compression.

    • +
    • zstd(n): Zstandard compression with level n.

    • +
    +

  • +
  • compression_level – Compression level to set.

  • +
  • dictionary_enabled – Sets if dictionary encoding is enabled. If None, +uses the default parquet writer setting.

  • +
  • dictionary_page_size_limit – Sets best effort maximum dictionary page size, +in bytes.

  • +
  • statistics_enabled – Sets if statistics are enabled for any column Valid +values are none, chunk, and page. If None, uses the +default parquet writer setting.

  • +
  • max_row_group_size – Target maximum number of rows in each row group +(defaults to 1M rows). Writing larger row groups requires more memory +to write, but can get better compression and be faster to read.

  • +
  • created_by – Sets “created by” property.

  • +
  • column_index_truncate_length – Sets column index truncate length.

  • +
  • statistics_truncate_length – Sets statistics truncate length. If None, +uses the default parquet writer setting.

  • +
  • data_page_row_count_limit – Sets best effort maximum number of rows in a data +page.

  • +
  • encoding – Sets default encoding for any column. Valid values are plain, +plain_dictionary, rle, bit_packed, delta_binary_packed, +delta_length_byte_array, delta_byte_array, rle_dictionary, +and byte_stream_split. If None, uses the default parquet writer +setting.

  • +
  • bloom_filter_on_write – Write bloom filters for all columns when creating +parquet files.

  • +
  • bloom_filter_fpp – Sets bloom filter false positive probability. If None, +uses the default parquet writer setting

  • +
  • bloom_filter_ndv – Sets bloom filter number of distinct values. If None, +uses the default parquet writer setting.

  • +
  • allow_single_file_parallelism – Controls whether DataFusion will attempt to +speed up writing parquet files by serializing them in parallel. Each +column in each row group in each output file are serialized in parallel +leveraging a maximum possible core count of +n_files * n_row_groups * n_columns.

  • +
  • maximum_parallel_row_group_writers – By default parallel parquet writer is +tuned for minimum memory usage in a streaming execution plan. You may +see a performance benefit when writing large parquet files by increasing +maximum_parallel_row_group_writers and +maximum_buffered_record_batches_per_stream if your system has idle +cores and can tolerate additional memory usage. Boosting these values is +likely worthwhile when writing out already in-memory data, such as from +a cached data frame.

  • +
  • maximum_buffered_record_batches_per_stream – See +maximum_parallel_row_group_writers.

  • +
  • column_specific_options – Overrides options for specific columns. If a column +is not a part of this dictionary, it will use the parameters provided +here.

  • +
+
+
+
+
+allow_single_file_parallelism = True
+
+ +
+
+bloom_filter_fpp = None
+
+ +
+
+bloom_filter_ndv = None
+
+ +
+
+bloom_filter_on_write = False
+
+ +
+
+column_index_truncate_length = 64
+
+ +
+
+column_specific_options = None
+
+ +
+
+created_by = 'datafusion-python'
+
+ +
+
+data_page_row_count_limit = 20000
+
+ +
+
+data_pagesize_limit = 1048576
+
+ +
+
+dictionary_enabled = True
+
+ +
+
+dictionary_page_size_limit = 1048576
+
+ +
+
+encoding = None
+
+ +
+
+max_row_group_size = 1048576
+
+ +
+
+maximum_buffered_record_batches_per_stream = 2
+
+ +
+
+maximum_parallel_row_group_writers = 1
+
+ +
+
+skip_arrow_metadata = False
+
+ +
+
+statistics_enabled = 'page'
+
+ +
+
+statistics_truncate_length = None
+
+ +
+
+write_batch_size = 1024
+
+ +
+
+writer_version = '1.0'
+
+ +
+ +
+
+class datafusion.RecordBatch(record_batch: datafusion._internal.RecordBatch)
+

This class is essentially a wrapper for pa.RecordBatch.

+

This constructor is generally not called by the end user.

+

See the RecordBatchStream iterator for generating this class.

+
+
+__arrow_c_array__(requested_schema: object | None = None) tuple[object, object]
+

Export the record batch via the Arrow C Data Interface.

+

This allows zero-copy interchange with libraries that support the +Arrow PyCapsule interface.

+
+
Parameters:
+

requested_schema – Attempt to provide the record batch using this +schema. Only straightforward projections such as column +selection or reordering are applied.

+
+
Returns:
+

Two Arrow PyCapsule objects representing the ArrowArray and +ArrowSchema.

+
+
+
+ +
+
+to_pyarrow() pyarrow.RecordBatch
+

Convert to pa.RecordBatch.

+
+ +
+
+record_batch
+
+ +
+ +
+
+class datafusion.RecordBatchStream(record_batch_stream: datafusion._internal.RecordBatchStream)
+

This class represents a stream of record batches.

+

These are typically the result of a +execute_stream() operation.

+

This constructor is typically not called by the end user.

+
+
+__aiter__() typing_extensions.Self
+

Return an asynchronous iterator over record batches.

+
+ +
+
+async __anext__() RecordBatch
+

Return the next RecordBatch in the stream asynchronously.

+
+ +
+
+__iter__() typing_extensions.Self
+

Return an iterator over record batches.

+
+ +
+
+__next__() RecordBatch
+

Return the next RecordBatch in the stream.

+
+ +
+
+next() RecordBatch
+

See __next__() for the iterator function.

+
+ +
+
+rbs
+
+ +
+ +
+
+class datafusion.RuntimeEnvBuilder
+

Runtime configuration options.

+

Create a new RuntimeEnvBuilder with default values.

+
+
+with_disk_manager_disabled() RuntimeEnvBuilder
+

Disable the disk manager, attempts to create temporary files will error.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+
+ +
+
+with_disk_manager_os() RuntimeEnvBuilder
+

Use the operating system’s temporary directory for disk manager.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+
+ +
+
+with_disk_manager_specified(*paths: str | pathlib.Path) RuntimeEnvBuilder
+

Use the specified paths for the disk manager’s temporary files.

+
+
Parameters:
+

paths – Paths to use for the disk manager’s temporary files.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+
+ +
+
+with_fair_spill_pool(size: int) RuntimeEnvBuilder
+

Use a fair spill pool with the specified size.

+

This pool works best when you know beforehand the query has multiple spillable +operators that will likely all need to spill. Sometimes it will cause spills +even when there was sufficient memory (reserved for other operators) to avoid +doing so:

+
┌───────────────────────z──────────────────────z───────────────┐
+│                       z                      z               │
+│                       z                      z               │
+│       Spillable       z       Unspillable    z     Free      │
+│        Memory         z        Memory        z    Memory     │
+│                       z                      z               │
+│                       z                      z               │
+└───────────────────────z──────────────────────z───────────────┘
+
+
+
+
Parameters:
+

size – Size of the memory pool in bytes.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+

Examples usage:

+
config = RuntimeEnvBuilder().with_fair_spill_pool(1024)
+
+
+
+ +
+
+with_greedy_memory_pool(size: int) RuntimeEnvBuilder
+

Use a greedy memory pool with the specified size.

+

This pool works well for queries that do not need to spill or have a single +spillable operator. See with_fair_spill_pool() if there are +multiple spillable operators that all will spill.

+
+
Parameters:
+

size – Size of the memory pool in bytes.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+

Example usage:

+
config = RuntimeEnvBuilder().with_greedy_memory_pool(1024)
+
+
+
+ +
+
+with_temp_file_path(path: str | pathlib.Path) RuntimeEnvBuilder
+

Use the specified path to create any needed temporary files.

+
+
Parameters:
+

path – Path to use for temporary files.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+

Example usage:

+
config = RuntimeEnvBuilder().with_temp_file_path("/tmp")
+
+
+
+ +
+
+with_unbounded_memory_pool() RuntimeEnvBuilder
+

Use an unbounded memory pool.

+
+
Returns:
+

A new RuntimeEnvBuilder object with the updated setting.

+
+
+
+ +
+
+config_internal
+
+ +
+ +
+
+class datafusion.SQLOptions
+

Options to be used when performing SQL queries.

+

Create a new SQLOptions with default values.

+

The default values are: +- DDL commands are allowed +- DML commands are allowed +- Statements are allowed

+
+
+with_allow_ddl(allow: bool = True) SQLOptions
+

Should DDL (Data Definition Language) commands be run?

+

Examples of DDL commands include CREATE TABLE and DROP TABLE.

+
+
Parameters:
+

allow – Allow DDL commands to be run.

+
+
Returns:
+

A new SQLOptions object with the updated setting.

+
+
+

Example usage:

+
options = SQLOptions().with_allow_ddl(True)
+
+
+
+ +
+
+with_allow_dml(allow: bool = True) SQLOptions
+

Should DML (Data Manipulation Language) commands be run?

+

Examples of DML commands include INSERT INTO and DELETE.

+
+
Parameters:
+

allow – Allow DML commands to be run.

+
+
Returns:
+

A new SQLOptions object with the updated setting.

+
+
+

Example usage:

+
options = SQLOptions().with_allow_dml(True)
+
+
+
+ +
+
+with_allow_statements(allow: bool = True) SQLOptions
+

Should statements such as SET VARIABLE and BEGIN TRANSACTION be run?

+
+
Parameters:
+

allow – Allow statements to be run.

+
+
Returns:
+

py:class:SQLOptions` object with the updated setting.

+
+
Return type:
+

A new

+
+
+

Example usage:

+
options = SQLOptions().with_allow_statements(True)
+
+
+
+ +
+
+options_internal
+
+ +
+ +
+
+class datafusion.ScalarUDF(name: str, func: collections.abc.Callable[Ellipsis, _R], input_fields: list[pyarrow.Field], return_field: _R, volatility: Volatility | str)
+

Class for performing scalar user-defined functions (UDF).

+

Scalar UDFs operate on a row by row basis. See also AggregateUDF for +operating on a group of rows.

+

Instantiate a scalar user-defined function (UDF).

+

See helper method udf() for argument details.

+
+
+__call__(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Execute the UDF.

+

This function is not typically called by an end user. These calls will +occur during the evaluation of the dataframe.

+
+ +
+
+__repr__() str
+

Print a string representation of the Scalar UDF.

+
+ +
+
+static from_pycapsule(func: ScalarUDFExportable) ScalarUDF
+

Create a Scalar UDF from ScalarUDF PyCapsule object.

+

This function will instantiate a Scalar UDF that uses a DataFusion +ScalarUDF that is exported via the FFI bindings.

+
+ +
+
+static udf(input_fields: collections.abc.Sequence[pyarrow.DataType | pyarrow.Field] | pyarrow.DataType | pyarrow.Field, return_field: pyarrow.DataType | pyarrow.Field, volatility: Volatility | str, name: str | None = None) collections.abc.Callable[Ellipsis, ScalarUDF]
+
+static udf(func: collections.abc.Callable[Ellipsis, _R], input_fields: collections.abc.Sequence[pyarrow.DataType | pyarrow.Field] | pyarrow.DataType | pyarrow.Field, return_field: pyarrow.DataType | pyarrow.Field, volatility: Volatility | str, name: str | None = None) ScalarUDF
+
+static udf(func: ScalarUDFExportable) ScalarUDF
+

Create a new User-Defined Function (UDF).

+

This class can be used both as either a function or a decorator.

+
+
Usage:
    +
  • As a function: udf(func, input_fields, return_field, volatility, name).

  • +
  • As a decorator: @udf(input_fields, return_field, volatility, name). +When used a decorator, do not pass func explicitly.

  • +
+
+
+

In lieu of passing a PyArrow Field, you can pass a DataType for simplicity. +When you do so, it will be assumed that the nullability of the inputs and +output are True and that they have no metadata.

+
+
Parameters:
+
    +
  • func (Callable, optional) – Only needed when calling as a function. +Skip this argument when using udf as a decorator. If you have a Rust +backed ScalarUDF within a PyCapsule, you can pass this parameter +and ignore the rest. They will be determined directly from the +underlying function. See the online documentation for more information.

  • +
  • input_fields (list[pa.Field | pa.DataType]) – The data types or Fields +of the arguments to func. This list must be of the same length +as the number of arguments.

  • +
  • return_field (_R) – The field of the return value from the function.

  • +
  • volatility (Volatility | str) – See Volatility for allowed values.

  • +
  • name (Optional[str]) – A descriptive name for the function.

  • +
+
+
Returns:
+

A user-defined function that can be used in SQL expressions, +data aggregation, or window function calls.

+
+
+

Example: Using udf as a function:

+
def double_func(x):
+    return x * 2
+double_udf = udf(double_func, [pa.int32()], pa.int32(),
+"volatile", "double_it")
+
+
+

Example: Using udf as a decorator:

+
@udf([pa.int32()], pa.int32(), "volatile", "double_it")
+def double_udf(x):
+    return x * 2
+
+
+
+ +
+
+_udf
+
+ +
+ +
+
+class datafusion.SessionConfig(config_options: dict[str, str] | None = None)
+

Session configuration options.

+

Create a new SessionConfig with the given configuration options.

+
+
Parameters:
+

config_options – Configuration options.

+
+
+
+
+set(key: str, value: str) SessionConfig
+

Set a configuration option.

+

Args: +key: Option key. +value: Option value.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_batch_size(batch_size: int) SessionConfig
+

Customize batch size.

+
+
Parameters:
+

batch_size – Batch size.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_create_default_catalog_and_schema(enabled: bool = True) SessionConfig
+

Control if the default catalog and schema will be automatically created.

+
+
Parameters:
+

enabled – Whether the default catalog and schema will be +automatically created.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_default_catalog_and_schema(catalog: str, schema: str) SessionConfig
+

Select a name for the default catalog and schema.

+
+
Parameters:
+
    +
  • catalog – Catalog name.

  • +
  • schema – Schema name.

  • +
+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_information_schema(enabled: bool = True) SessionConfig
+

Enable or disable the inclusion of information_schema virtual tables.

+
+
Parameters:
+

enabled – Whether to include information_schema virtual tables.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_parquet_pruning(enabled: bool = True) SessionConfig
+

Enable or disable the use of pruning predicate for parquet readers.

+

Pruning predicates will enable the reader to skip row groups.

+
+
Parameters:
+

enabled – Whether to use pruning predicate for parquet readers.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_repartition_aggregations(enabled: bool = True) SessionConfig
+

Enable or disable the use of repartitioning for aggregations.

+

Enabling this improves parallelism.

+
+
Parameters:
+

enabled – Whether to use repartitioning for aggregations.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_repartition_file_min_size(size: int) SessionConfig
+

Set minimum file range size for repartitioning scans.

+
+
Parameters:
+

size – Minimum file range size.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_repartition_file_scans(enabled: bool = True) SessionConfig
+

Enable or disable the use of repartitioning for file scans.

+
+
Parameters:
+

enabled – Whether to use repartitioning for file scans.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_repartition_joins(enabled: bool = True) SessionConfig
+

Enable or disable the use of repartitioning for joins to improve parallelism.

+
+
Parameters:
+

enabled – Whether to use repartitioning for joins.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_repartition_sorts(enabled: bool = True) SessionConfig
+

Enable or disable the use of repartitioning for window functions.

+

This may improve parallelism.

+
+
Parameters:
+

enabled – Whether to use repartitioning for window functions.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_repartition_windows(enabled: bool = True) SessionConfig
+

Enable or disable the use of repartitioning for window functions.

+

This may improve parallelism.

+
+
Parameters:
+

enabled – Whether to use repartitioning for window functions.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+with_target_partitions(target_partitions: int) SessionConfig
+

Customize the number of target partitions for query execution.

+

Increasing partitions can increase concurrency.

+
+
Parameters:
+

target_partitions – Number of target partitions.

+
+
Returns:
+

A new SessionConfig object with the updated setting.

+
+
+
+ +
+
+config_internal
+
+ +
+ +
+
+class datafusion.Table(table: Table | datafusion.context.TableProviderExportable | datafusion.DataFrame | pyarrow.dataset.Dataset, ctx: datafusion.SessionContext | None = None)
+

A DataFusion table.

+

Internally we currently support the following types of tables:

+
    +
  • Tables created using built-in DataFusion methods, such as +reading from CSV or Parquet

  • +
  • pyarrow datasets

  • +
  • DataFusion DataFrames, which will be converted into a view

  • +
  • Externally provided tables implemented with the FFI PyCapsule +interface (advanced)

  • +
+

Constructor.

+
+
+__repr__() str
+

Print a string representation of the table.

+
+ +
+
+static from_dataset(dataset: pyarrow.dataset.Dataset) Table
+

Turn a pyarrow.dataset Dataset into a Table.

+
+ +
+
+__slots__ = ('_inner',)
+
+ +
+
+_inner
+
+ +
+
+property kind: str
+

Returns the kind of table.

+
+ +
+
+property schema: pyarrow.Schema
+

Returns the schema associated with this table.

+
+ +
+ +
+
+class datafusion.TableFunction(name: str, func: collections.abc.Callable[[], any], ctx: datafusion.SessionContext | None = None)
+

Class for performing user-defined table functions (UDTF).

+

Table functions generate new table providers based on the +input expressions.

+

Instantiate a user-defined table function (UDTF).

+

See udtf() for a convenience function and argument +descriptions.

+
+
+__call__(*args: datafusion.expr.Expr) Any
+

Execute the UDTF and return a table provider.

+
+ +
+
+__repr__() str
+

User printable representation.

+
+ +
+
+static _create_table_udf(func: collections.abc.Callable[Ellipsis, Any], name: str) TableFunction
+

Create a TableFunction instance from function arguments.

+
+ +
+
+static _create_table_udf_decorator(name: str | None = None) collections.abc.Callable[[collections.abc.Callable[[], WindowEvaluator]], collections.abc.Callable[Ellipsis, datafusion.expr.Expr]]
+

Create a decorator for a WindowUDF.

+
+ +
+
+static udtf(name: str) collections.abc.Callable[Ellipsis, Any]
+
+static udtf(func: collections.abc.Callable[[], Any], name: str) TableFunction
+

Create a new User-Defined Table Function (UDTF).

+
+ +
+
+_udtf
+
+ +
+ +
+
+class datafusion.WindowFrame(units: str, start_bound: Any | None, end_bound: Any | None)
+

Defines a window frame for performing window operations.

+

Construct a window frame using the given parameters.

+
+
Parameters:
+
    +
  • units – Should be one of rows, range, or groups.

  • +
  • start_bound – Sets the preceding bound. Must be >= 0. If none, this +will be set to unbounded. If unit type is groups, this +parameter must be set.

  • +
  • end_bound – Sets the following bound. Must be >= 0. If none, this +will be set to unbounded. If unit type is groups, this +parameter must be set.

  • +
+
+
+
+
+__repr__() str
+

Print a string representation of the window frame.

+
+ +
+
+get_frame_units() str
+

Returns the window frame units for the bounds.

+
+ +
+
+get_lower_bound() WindowFrameBound
+

Returns starting bound.

+
+ +
+
+get_upper_bound() WindowFrameBound
+

Returns end bound.

+
+ +
+
+window_frame
+
+ +
+ +
+
+class datafusion.WindowUDF(name: str, func: collections.abc.Callable[[], WindowEvaluator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str)
+

Class for performing window user-defined functions (UDF).

+

Window UDFs operate on a partition of rows. See +also ScalarUDF for operating on a row by row basis.

+

Instantiate a user-defined window function (UDWF).

+

See udwf() for a convenience function and argument +descriptions.

+
+
+__call__(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Execute the UDWF.

+

This function is not typically called by an end user. These calls will +occur during the evaluation of the dataframe.

+
+ +
+
+__repr__() str
+

Print a string representation of the Window UDF.

+
+ +
+
+static _create_window_udf(func: collections.abc.Callable[[], WindowEvaluator], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) WindowUDF
+

Create a WindowUDF instance from function arguments.

+
+ +
+
+static _create_window_udf_decorator(input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) collections.abc.Callable[[collections.abc.Callable[[], WindowEvaluator]], collections.abc.Callable[Ellipsis, datafusion.expr.Expr]]
+

Create a decorator for a WindowUDF.

+
+ +
+
+static _get_default_name(func: collections.abc.Callable) str
+

Get the default name for a function based on its attributes.

+
+ +
+
+static _normalize_input_types(input_types: pyarrow.DataType | list[pyarrow.DataType]) list[pyarrow.DataType]
+

Convert a single DataType to a list if needed.

+
+ +
+
+static from_pycapsule(func: WindowUDFExportable) WindowUDF
+

Create a Window UDF from WindowUDF PyCapsule object.

+

This function will instantiate a Window UDF that uses a DataFusion +WindowUDF that is exported via the FFI bindings.

+
+ +
+
+static udwf(input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) collections.abc.Callable[Ellipsis, WindowUDF]
+
+static udwf(func: collections.abc.Callable[[], WindowEvaluator], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) WindowUDF
+

Create a new User-Defined Window Function (UDWF).

+

This class can be used both as either a function or a decorator.

+
+
Usage:
    +
  • As a function: udwf(func, input_types, return_type, volatility, name).

  • +
  • As a decorator: @udwf(input_types, return_type, volatility, name). +When using udwf as a decorator, do not pass func explicitly.

  • +
+
+
+

Function example:

+
import pyarrow as pa
+
+class BiasedNumbers(WindowEvaluator):
+    def __init__(self, start: int = 0) -> None:
+        self.start = start
+
+    def evaluate_all(self, values: list[pa.Array],
+        num_rows: int) -> pa.Array:
+        return pa.array([self.start + i for i in range(num_rows)])
+
+def bias_10() -> BiasedNumbers:
+    return BiasedNumbers(10)
+
+udwf1 = udwf(BiasedNumbers, pa.int64(), pa.int64(), "immutable")
+udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable")
+udwf3 = udwf(lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable")
+
+
+

Decorator example:

+
@udwf(pa.int64(), pa.int64(), "immutable")
+def biased_numbers() -> BiasedNumbers:
+    return BiasedNumbers(10)
+
+
+
+
Parameters:
+
    +
  • func – Only needed when calling as a function. Skip this argument when +using udwf as a decorator. If you have a Rust backed WindowUDF +within a PyCapsule, you can pass this parameter and ignore the rest. +They will be determined directly from the underlying function. See +the online documentation for more information.

  • +
  • input_types – The data types of the arguments.

  • +
  • return_type – The data type of the return value.

  • +
  • volatility – See Volatility for allowed values.

  • +
  • name – A descriptive name for the function.

  • +
+
+
Returns:
+

A user-defined window function that can be used in window function calls.

+
+
+
+ +
+
+_udwf
+
+ +
+ +
+
+datafusion.configure_formatter(**kwargs: Any) None
+

Configure the global DataFrame HTML formatter.

+

This function creates a new formatter with the provided configuration +and sets it as the global formatter for all DataFrames.

+
+
Parameters:
+

**kwargs – Formatter configuration parameters like max_cell_length, +max_width, max_height, enable_cell_expansion, etc.

+
+
Raises:
+

ValueError – If any invalid parameters are provided

+
+
+

Example

+
>>> from datafusion.html_formatter import configure_formatter
+>>> configure_formatter(
+...     max_cell_length=50,
+...     max_height=500,
+...     enable_cell_expansion=True,
+...     use_shared_styles=True
+... )
+
+
+
+ +
+
+datafusion.lit(value: Any) expr.Expr
+

Create a literal expression.

+
+ +
+
+datafusion.literal(value: Any) expr.Expr
+

Create a literal expression.

+
+ +
+
+datafusion.read_avro(path: str | pathlib.Path, schema: pyarrow.Schema | None = None, file_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_extension: str = '.avro') datafusion.dataframe.DataFrame
+

Create a DataFrame for reading Avro data source.

+

This function will use the global context. Any functions or tables registered +with another context may not be accessible when used with a DataFrame created +using this function.

+
+
Parameters:
+
    +
  • path – Path to the Avro file.

  • +
  • schema – The data source schema.

  • +
  • file_partition_cols – Partition columns.

  • +
  • file_extension – File extension to select.

  • +
+
+
Returns:
+

DataFrame representation of the read Avro file

+
+
+
+ +
+
+datafusion.read_csv(path: str | pathlib.Path | list[str] | list[pathlib.Path], schema: pyarrow.Schema | None = None, has_header: bool = True, delimiter: str = ',', schema_infer_max_records: int = 1000, file_extension: str = '.csv', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None, options: datafusion.options.CsvReadOptions | None = None) datafusion.dataframe.DataFrame
+

Read a CSV data source.

+

This function will use the global context. Any functions or tables registered +with another context may not be accessible when used with a DataFrame created +using this function.

+
+
Parameters:
+
    +
  • path – Path to the CSV file

  • +
  • schema – An optional schema representing the CSV files. If None, the +CSV reader will try to infer it based on data in file.

  • +
  • has_header – Whether the CSV file have a header. If schema inference +is run on a file with no headers, default column names are +created.

  • +
  • delimiter – An optional column delimiter.

  • +
  • schema_infer_max_records – Maximum number of rows to read from CSV +files for schema inference if needed.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • table_partition_cols – Partition columns.

  • +
  • file_compression_type – File compression type.

  • +
  • options – Set advanced options for CSV reading. This cannot be +combined with any of the other options in this method.

  • +
+
+
Returns:
+

DataFrame representation of the read CSV files

+
+
+
+ +
+
+datafusion.read_json(path: str | pathlib.Path, schema: pyarrow.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = '.json', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None) datafusion.dataframe.DataFrame
+

Read a line-delimited JSON data source.

+

This function will use the global context. Any functions or tables registered +with another context may not be accessible when used with a DataFrame created +using this function.

+
+
Parameters:
+
    +
  • path – Path to the JSON file.

  • +
  • schema – The data source schema.

  • +
  • schema_infer_max_records – Maximum number of rows to read from JSON +files for schema inference if needed.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • table_partition_cols – Partition columns.

  • +
  • file_compression_type – File compression type.

  • +
+
+
Returns:
+

DataFrame representation of the read JSON files.

+
+
+
+ +
+
+datafusion.read_parquet(path: str | pathlib.Path, table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, parquet_pruning: bool = True, file_extension: str = '.parquet', skip_metadata: bool = True, schema: pyarrow.Schema | None = None, file_sort_order: list[list[datafusion.expr.Expr]] | None = None) datafusion.dataframe.DataFrame
+

Read a Parquet source into a Dataframe.

+

This function will use the global context. Any functions or tables registered +with another context may not be accessible when used with a DataFrame created +using this function.

+
+
Parameters:
+
    +
  • path – Path to the Parquet file.

  • +
  • table_partition_cols – Partition columns.

  • +
  • parquet_pruning – Whether the parquet reader should use the predicate +to prune row groups.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • skip_metadata – Whether the parquet reader should skip any metadata +that may be in the file schema. This can help avoid schema +conflicts due to metadata.

  • +
  • schema – An optional schema representing the parquet files. If None, +the parquet reader will try to infer it based on data in the +file.

  • +
  • file_sort_order – Sort order for the file.

  • +
+
+
Returns:
+

DataFrame representation of the read Parquet files

+
+
+
+ +
+
+datafusion.DFSchema
+
+ +
+
+datafusion.col: Col
+
+ +
+
+datafusion.column: Col
+
+ +
+
+datafusion.udaf
+
+ +
+
+datafusion.udf
+
+ +
+
+datafusion.udtf
+
+ +
+
+datafusion.udwf
+
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/input/base/index.html b/autoapi/datafusion/input/base/index.html new file mode 100644 index 000000000..0a4a58427 --- /dev/null +++ b/autoapi/datafusion/input/base/index.html @@ -0,0 +1,580 @@ + + + + + + + + datafusion.input.base — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

datafusion.input.base

+

This module provides BaseInputSource.

+

A user can extend this to provide a custom input source.

+
+

Classes

+ + + + + + +

BaseInputSource

Base Input Source class.

+
+
+

Module Contents

+
+
+class datafusion.input.base.BaseInputSource
+

Bases: abc.ABC

+

Base Input Source class.

+

If a consuming library would like to provider their own InputSource this is +the class they should extend to write their own.

+

Once completed the Plugin InputSource can be registered with the +SessionContext to ensure that it will be used in order +to obtain the SqlTable information from the custom datasource.

+
+
+abstract build_table(input_item: Any, table_name: str, **kwarg: Any) datafusion.common.SqlTable
+

Create a table from the input source.

+
+ +
+
+abstract is_correct_input(input_item: Any, table_name: str, **kwargs: Any) bool
+

Returns True if the input is valid.

+
+ +
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/input/index.html b/autoapi/datafusion/input/index.html new file mode 100644 index 000000000..ea03669e9 --- /dev/null +++ b/autoapi/datafusion/input/index.html @@ -0,0 +1,590 @@ + + + + + + + + datafusion.input — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

datafusion.input

+

This package provides for input sources.

+

The primary class used within DataFusion is LocationInputPlugin.

+
+

Submodules

+ +
+
+

Classes

+ + + + + + +

LocationInputPlugin

Input Plugin for everything.

+
+
+

Package Contents

+
+
+class datafusion.input.LocationInputPlugin
+

Bases: datafusion.input.base.BaseInputSource

+

Input Plugin for everything.

+

This can be read in from a file (on disk, remote etc.).

+
+
+build_table(input_item: str, table_name: str, **kwargs: Any) datafusion.common.SqlTable
+

Create a table from the input source.

+
+ +
+
+is_correct_input(input_item: Any, table_name: str, **kwargs: Any) bool
+

Returns True if the input is valid.

+
+ +
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/input/location/index.html b/autoapi/datafusion/input/location/index.html new file mode 100644 index 000000000..29dc97743 --- /dev/null +++ b/autoapi/datafusion/input/location/index.html @@ -0,0 +1,575 @@ + + + + + + + + datafusion.input.location — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

datafusion.input.location

+

The default input source for DataFusion.

+
+

Classes

+ + + + + + +

LocationInputPlugin

Input Plugin for everything.

+
+
+

Module Contents

+
+
+class datafusion.input.location.LocationInputPlugin
+

Bases: datafusion.input.base.BaseInputSource

+

Input Plugin for everything.

+

This can be read in from a file (on disk, remote etc.).

+
+
+build_table(input_item: str, table_name: str, **kwargs: Any) datafusion.common.SqlTable
+

Create a table from the input source.

+
+ +
+
+is_correct_input(input_item: Any, table_name: str, **kwargs: Any) bool
+

Returns True if the input is valid.

+
+ +
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/io/index.html b/autoapi/datafusion/io/index.html new file mode 100644 index 000000000..ce57fb983 --- /dev/null +++ b/autoapi/datafusion/io/index.html @@ -0,0 +1,683 @@ + + + + + + + + datafusion.io — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

datafusion.io

+

IO read functions using global context.

+
+

Functions

+ + + + + + + + + + + + + + + +

read_avro(→ datafusion.dataframe.DataFrame)

Create a DataFrame for reading Avro data source.

read_csv(→ datafusion.dataframe.DataFrame)

Read a CSV data source.

read_json(→ datafusion.dataframe.DataFrame)

Read a line-delimited JSON data source.

read_parquet(→ datafusion.dataframe.DataFrame)

Read a Parquet source into a Dataframe.

+
+
+

Module Contents

+
+
+datafusion.io.read_avro(path: str | pathlib.Path, schema: pyarrow.Schema | None = None, file_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_extension: str = '.avro') datafusion.dataframe.DataFrame
+

Create a DataFrame for reading Avro data source.

+

This function will use the global context. Any functions or tables registered +with another context may not be accessible when used with a DataFrame created +using this function.

+
+
Parameters:
+
    +
  • path – Path to the Avro file.

  • +
  • schema – The data source schema.

  • +
  • file_partition_cols – Partition columns.

  • +
  • file_extension – File extension to select.

  • +
+
+
Returns:
+

DataFrame representation of the read Avro file

+
+
+
+ +
+
+datafusion.io.read_csv(path: str | pathlib.Path | list[str] | list[pathlib.Path], schema: pyarrow.Schema | None = None, has_header: bool = True, delimiter: str = ',', schema_infer_max_records: int = 1000, file_extension: str = '.csv', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None, options: datafusion.options.CsvReadOptions | None = None) datafusion.dataframe.DataFrame
+

Read a CSV data source.

+

This function will use the global context. Any functions or tables registered +with another context may not be accessible when used with a DataFrame created +using this function.

+
+
Parameters:
+
    +
  • path – Path to the CSV file

  • +
  • schema – An optional schema representing the CSV files. If None, the +CSV reader will try to infer it based on data in file.

  • +
  • has_header – Whether the CSV file have a header. If schema inference +is run on a file with no headers, default column names are +created.

  • +
  • delimiter – An optional column delimiter.

  • +
  • schema_infer_max_records – Maximum number of rows to read from CSV +files for schema inference if needed.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • table_partition_cols – Partition columns.

  • +
  • file_compression_type – File compression type.

  • +
  • options – Set advanced options for CSV reading. This cannot be +combined with any of the other options in this method.

  • +
+
+
Returns:
+

DataFrame representation of the read CSV files

+
+
+
+ +
+
+datafusion.io.read_json(path: str | pathlib.Path, schema: pyarrow.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = '.json', table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, file_compression_type: str | None = None) datafusion.dataframe.DataFrame
+

Read a line-delimited JSON data source.

+

This function will use the global context. Any functions or tables registered +with another context may not be accessible when used with a DataFrame created +using this function.

+
+
Parameters:
+
    +
  • path – Path to the JSON file.

  • +
  • schema – The data source schema.

  • +
  • schema_infer_max_records – Maximum number of rows to read from JSON +files for schema inference if needed.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • table_partition_cols – Partition columns.

  • +
  • file_compression_type – File compression type.

  • +
+
+
Returns:
+

DataFrame representation of the read JSON files.

+
+
+
+ +
+
+datafusion.io.read_parquet(path: str | pathlib.Path, table_partition_cols: list[tuple[str, str | pyarrow.DataType]] | None = None, parquet_pruning: bool = True, file_extension: str = '.parquet', skip_metadata: bool = True, schema: pyarrow.Schema | None = None, file_sort_order: list[list[datafusion.expr.Expr]] | None = None) datafusion.dataframe.DataFrame
+

Read a Parquet source into a Dataframe.

+

This function will use the global context. Any functions or tables registered +with another context may not be accessible when used with a DataFrame created +using this function.

+
+
Parameters:
+
    +
  • path – Path to the Parquet file.

  • +
  • table_partition_cols – Partition columns.

  • +
  • parquet_pruning – Whether the parquet reader should use the predicate +to prune row groups.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • skip_metadata – Whether the parquet reader should skip any metadata +that may be in the file schema. This can help avoid schema +conflicts due to metadata.

  • +
  • schema – An optional schema representing the parquet files. If None, +the parquet reader will try to infer it based on data in the +file.

  • +
  • file_sort_order – Sort order for the file.

  • +
+
+
Returns:
+

DataFrame representation of the read Parquet files

+
+
+
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/object_store/index.html b/autoapi/datafusion/object_store/index.html new file mode 100644 index 000000000..8b1ad2c2d --- /dev/null +++ b/autoapi/datafusion/object_store/index.html @@ -0,0 +1,608 @@ + + + + + + + + datafusion.object_store — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

datafusion.object_store

+

Object store functionality.

+
+

Attributes

+ + + + + + + + + + + + + + + + + + +

AmazonS3

GoogleCloud

Http

LocalFileSystem

MicrosoftAzure

+
+
+

Module Contents

+
+
+datafusion.object_store.AmazonS3
+
+ +
+
+datafusion.object_store.GoogleCloud
+
+ +
+
+datafusion.object_store.Http
+
+ +
+
+datafusion.object_store.LocalFileSystem
+
+ +
+
+datafusion.object_store.MicrosoftAzure
+
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/options/index.html b/autoapi/datafusion/options/index.html new file mode 100644 index 000000000..e436b87f5 --- /dev/null +++ b/autoapi/datafusion/options/index.html @@ -0,0 +1,1039 @@ + + + + + + + + datafusion.options — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

datafusion.options

+

Options for reading various file formats.

+
+

Classes

+ + + + + + +

CsvReadOptions

Options for reading CSV files.

+
+
+

Module Contents

+
+
+class datafusion.options.CsvReadOptions(*, has_header: bool = True, delimiter: str = ',', quote: str = '"', terminator: str | None = None, escape: str | None = None, comment: str | None = None, newlines_in_values: bool = False, schema: pyarrow.Schema | None = None, schema_infer_max_records: int = DEFAULT_MAX_INFER_SCHEMA, file_extension: str = '.csv', table_partition_cols: list[tuple[str, pyarrow.DataType]] | None = None, file_compression_type: str = '', file_sort_order: list[list[datafusion.expr.SortExpr]] | None = None, null_regex: str | None = None, truncated_rows: bool = False)
+

Options for reading CSV files.

+

This class provides a builder pattern for configuring CSV reading options. +All methods starting with with_ return self to allow method chaining.

+

Initialize CsvReadOptions.

+
+
Parameters:
+
    +
  • has_header – Does the CSV file have a header row? If schema inference +is run on a file with no headers, default column names are created.

  • +
  • delimiter – Column delimiter character. Must be a single ASCII character.

  • +
  • quote – Quote character for fields containing delimiters or newlines. +Must be a single ASCII character.

  • +
  • terminator – Optional line terminator character. If None, uses CRLF. +Must be a single ASCII character.

  • +
  • escape – Optional escape character for quotes. Must be a single ASCII +character.

  • +
  • comment – If specified, lines beginning with this character are ignored. +Must be a single ASCII character.

  • +
  • newlines_in_values – Whether newlines in quoted values are supported. +Parsing newlines in quoted values may be affected by execution +behavior such as parallel file scanning. Setting this to True +ensures that newlines in values are parsed successfully, which may +reduce performance.

  • +
  • schema – Optional PyArrow schema representing the CSV files. If None, +the CSV reader will try to infer it based on data in the file.

  • +
  • schema_infer_max_records – Maximum number of rows to read from CSV files +for schema inference if needed.

  • +
  • file_extension – File extension; only files with this extension are +selected for data input.

  • +
  • table_partition_cols – Partition columns as a list of tuples of +(column_name, data_type).

  • +
  • file_compression_type – File compression type. Supported values are +"gzip", "bz2", "xz", "zstd", or empty string for +uncompressed.

  • +
  • file_sort_order – Optional sort order of the files as a list of sort +expressions per file.

  • +
  • null_regex – Optional regex pattern to match null values in the CSV.

  • +
  • truncated_rows – Whether to allow truncated rows when parsing. By default +this is False and will error if the CSV rows have different +lengths. When set to True, it will allow records with less than +the expected number of columns and fill the missing columns with +nulls. If the record’s schema is not nullable, it will still return +an error.

  • +
+
+
+
+
+to_inner() datafusion._internal.options.CsvReadOptions
+

Convert this object into the underlying Rust structure.

+

This is intended for internal use only.

+
+ +
+
+with_comment(comment: str | None) CsvReadOptions
+

Configure the comment character.

+
+ +
+
+with_delimiter(delimiter: str) CsvReadOptions
+

Configure the column delimiter.

+
+ +
+
+with_escape(escape: str | None) CsvReadOptions
+

Configure the escape character.

+
+ +
+
+with_file_compression_type(file_compression_type: str) CsvReadOptions
+

Configure file compression type.

+
+ +
+
+with_file_extension(file_extension: str) CsvReadOptions
+

Configure the file extension filter.

+
+ +
+
+with_file_sort_order(file_sort_order: list[list[datafusion.expr.SortExpr]]) CsvReadOptions
+

Configure file sort order.

+
+ +
+
+with_has_header(has_header: bool) CsvReadOptions
+

Configure whether the CSV has a header row.

+
+ +
+
+with_newlines_in_values(newlines_in_values: bool) CsvReadOptions
+

Configure whether newlines in values are supported.

+
+ +
+
+with_null_regex(null_regex: str | None) CsvReadOptions
+

Configure null value regex pattern.

+
+ +
+
+with_quote(quote: str) CsvReadOptions
+

Configure the quote character.

+
+ +
+
+with_schema(schema: pyarrow.Schema | None) CsvReadOptions
+

Configure the schema.

+
+ +
+
+with_schema_infer_max_records(schema_infer_max_records: int) CsvReadOptions
+

Configure maximum records for schema inference.

+
+ +
+
+with_table_partition_cols(table_partition_cols: list[tuple[str, pyarrow.DataType]]) CsvReadOptions
+

Configure table partition columns.

+
+ +
+
+with_terminator(terminator: str | None) CsvReadOptions
+

Configure the line terminator character.

+
+ +
+
+with_truncated_rows(truncated_rows: bool) CsvReadOptions
+

Configure whether to allow truncated rows.

+
+ +
+
+comment = None
+
+ +
+
+delimiter = ','
+
+ +
+
+escape = None
+
+ +
+
+file_compression_type = ''
+
+ +
+
+file_extension = '.csv'
+
+ +
+
+file_sort_order = []
+
+ +
+
+has_header = True
+
+ +
+
+newlines_in_values = False
+
+ +
+
+null_regex = None
+
+ +
+
+quote = '"'
+
+ +
+
+schema = None
+
+ +
+
+schema_infer_max_records = 1000
+
+ +
+
+table_partition_cols = []
+
+ +
+
+terminator = None
+
+ +
+
+truncated_rows = False
+
+ +
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/plan/index.html b/autoapi/datafusion/plan/index.html new file mode 100644 index 000000000..07cc3fdf6 --- /dev/null +++ b/autoapi/datafusion/plan/index.html @@ -0,0 +1,864 @@ + + + + + + + + datafusion.plan — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

datafusion.plan

+

This module supports physical and logical plans in DataFusion.

+
+

Classes

+ + + + + + + + + +

ExecutionPlan

Represent nodes in the DataFusion Physical Plan.

LogicalPlan

Logical Plan.

+
+
+

Module Contents

+
+
+class datafusion.plan.ExecutionPlan(plan: datafusion._internal.ExecutionPlan)
+

Represent nodes in the DataFusion Physical Plan.

+

This constructor should not be called by the end user.

+
+
+__repr__() str
+

Print a string representation of the physical plan.

+
+ +
+
+children() list[ExecutionPlan]
+

Get a list of children ExecutionPlan that act as inputs to this plan.

+

The returned list will be empty for leaf nodes such as scans, will contain a +single value for unary nodes, or two values for binary nodes (such as joins).

+
+ +
+
+display() str
+

Print the physical plan.

+
+ +
+
+display_indent() str
+

Print an indented form of the physical plan.

+
+ +
+
+static from_proto(ctx: datafusion.context.SessionContext, data: bytes) ExecutionPlan
+

Create an ExecutionPlan from protobuf bytes.

+

Tables created in memory from record batches are currently not supported.

+
+ +
+
+to_proto() bytes
+

Convert an ExecutionPlan into protobuf bytes.

+

Tables created in memory from record batches are currently not supported.

+
+ +
+
+_raw_plan
+
+ +
+
+property partition_count: int
+

Returns the number of partitions in the physical plan.

+
+ +
+ +
+
+class datafusion.plan.LogicalPlan(plan: datafusion._internal.LogicalPlan)
+

Logical Plan.

+

A LogicalPlan is a node in a tree of relational operators (such as +Projection or Filter).

+

Represents transforming an input relation (table) to an output relation +(table) with a potentially different schema. Plans form a dataflow tree +where data flows from leaves up to the root to produce the query result.

+

A LogicalPlan can be created by the SQL query planner, the DataFrame API, +or programmatically (for example custom query languages).

+

This constructor should not be called by the end user.

+
+
+__eq__(other: LogicalPlan) bool
+

Test equality.

+
+ +
+
+__repr__() str
+

Generate a printable representation of the plan.

+
+ +
+
+display() str
+

Print the logical plan.

+
+ +
+
+display_graphviz() str
+

Print the graph visualization of the logical plan.

+

Returns a format`able structure that produces lines meant for graphical display +using the `DOT language. This format can be visualized using software from +[graphviz](https://graphviz.org/)

+
+ +
+
+display_indent() str
+

Print an indented form of the logical plan.

+
+ +
+
+display_indent_schema() str
+

Print an indented form of the schema for the logical plan.

+
+ +
+
+static from_proto(ctx: datafusion.context.SessionContext, data: bytes) LogicalPlan
+

Create a LogicalPlan from protobuf bytes.

+

Tables created in memory from record batches are currently not supported.

+
+ +
+
+inputs() list[LogicalPlan]
+

Returns the list of inputs to the logical plan.

+
+ +
+
+to_proto() bytes
+

Convert a LogicalPlan to protobuf bytes.

+

Tables created in memory from record batches are currently not supported.

+
+ +
+
+to_variant() Any
+

Convert the logical plan into its specific variant.

+
+ +
+
+_raw_plan
+
+ +
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/record_batch/index.html b/autoapi/datafusion/record_batch/index.html new file mode 100644 index 000000000..473630d11 --- /dev/null +++ b/autoapi/datafusion/record_batch/index.html @@ -0,0 +1,716 @@ + + + + + + + + datafusion.record_batch — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

datafusion.record_batch

+

This module provides the classes for handling record batches.

+

These are typically the result of dataframe +datafusion.dataframe.execute_stream() operations.

+
+

Classes

+ + + + + + + + + +

RecordBatch

This class is essentially a wrapper for pa.RecordBatch.

RecordBatchStream

This class represents a stream of record batches.

+
+
+

Module Contents

+
+
+class datafusion.record_batch.RecordBatch(record_batch: datafusion._internal.RecordBatch)
+

This class is essentially a wrapper for pa.RecordBatch.

+

This constructor is generally not called by the end user.

+

See the RecordBatchStream iterator for generating this class.

+
+
+__arrow_c_array__(requested_schema: object | None = None) tuple[object, object]
+

Export the record batch via the Arrow C Data Interface.

+

This allows zero-copy interchange with libraries that support the +Arrow PyCapsule interface.

+
+
Parameters:
+

requested_schema – Attempt to provide the record batch using this +schema. Only straightforward projections such as column +selection or reordering are applied.

+
+
Returns:
+

Two Arrow PyCapsule objects representing the ArrowArray and +ArrowSchema.

+
+
+
+ +
+
+to_pyarrow() pyarrow.RecordBatch
+

Convert to pa.RecordBatch.

+
+ +
+
+record_batch
+
+ +
+ +
+
+class datafusion.record_batch.RecordBatchStream(record_batch_stream: datafusion._internal.RecordBatchStream)
+

This class represents a stream of record batches.

+

These are typically the result of a +execute_stream() operation.

+

This constructor is typically not called by the end user.

+
+
+__aiter__() typing_extensions.Self
+

Return an asynchronous iterator over record batches.

+
+ +
+
+async __anext__() RecordBatch
+

Return the next RecordBatch in the stream asynchronously.

+
+ +
+
+__iter__() typing_extensions.Self
+

Return an iterator over record batches.

+
+ +
+
+__next__() RecordBatch
+

Return the next RecordBatch in the stream.

+
+ +
+
+next() RecordBatch
+

See __next__() for the iterator function.

+
+ +
+
+rbs
+
+ +
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/substrait/index.html b/autoapi/datafusion/substrait/index.html new file mode 100644 index 000000000..52aeeca59 --- /dev/null +++ b/autoapi/datafusion/substrait/index.html @@ -0,0 +1,857 @@ + + + + + + + + datafusion.substrait — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

datafusion.substrait

+

This module provides support for using substrait with datafusion.

+

For additional information about substrait, see https://substrait.io/ for more +information about substrait.

+
+

Classes

+ + + + + + + + + + + + + + + +

Consumer

Generates a logical plan from a substrait plan.

Plan

A class representing an encodable substrait plan.

Producer

Generates substrait plans from a logical plan.

Serde

Provides the Substrait serialization and deserialization.

+
+
+

Module Contents

+
+
+class datafusion.substrait.Consumer
+

Generates a logical plan from a substrait plan.

+
+
+static from_substrait_plan(ctx: datafusion.context.SessionContext, plan: Plan) datafusion.plan.LogicalPlan
+

Convert a Substrait plan to a DataFusion LogicalPlan.

+
+
Parameters:
+
    +
  • ctx – SessionContext to use.

  • +
  • plan – Substrait plan to convert.

  • +
+
+
Returns:
+

LogicalPlan.

+
+
+
+ +
+ +
+
+class datafusion.substrait.Plan(plan: datafusion._internal.substrait.Plan)
+

A class representing an encodable substrait plan.

+

Create a substrait plan.

+

The user should not have to call this constructor directly. Rather, it +should be created via Serde or py:class:Producer classes +in this module.

+
+
+encode() bytes
+

Encode the plan to bytes.

+
+
Returns:
+

Encoded plan.

+
+
+
+ +
+
+static from_json(json: str) Plan
+

Parse a plan from a JSON string representation.

+
+
Parameters:
+

json – JSON representation of a Substrait plan.

+
+
Returns:
+

Plan object representing the Substrait plan.

+
+
+
+ +
+
+to_json() str
+

Get the JSON representation of the Substrait plan.

+
+
Returns:
+

A JSON representation of the Substrait plan.

+
+
+
+ +
+
+plan_internal
+
+ +
+ +
+
+class datafusion.substrait.Producer
+

Generates substrait plans from a logical plan.

+
+
+static to_substrait_plan(logical_plan: datafusion.plan.LogicalPlan, ctx: datafusion.context.SessionContext) Plan
+

Convert a DataFusion LogicalPlan to a Substrait plan.

+
+
Parameters:
+
    +
  • logical_plan – LogicalPlan to convert.

  • +
  • ctx – SessionContext to use.

  • +
+
+
Returns:
+

Substrait plan.

+
+
+
+ +
+ +
+
+class datafusion.substrait.Serde
+

Provides the Substrait serialization and deserialization.

+
+
+static deserialize(path: str | pathlib.Path) Plan
+

Deserialize a Substrait plan from a file.

+
+
Parameters:
+

path – Path to read the Substrait plan from.

+
+
Returns:
+

Substrait plan.

+
+
+
+ +
+
+static deserialize_bytes(proto_bytes: bytes) Plan
+

Deserialize a Substrait plan from bytes.

+
+
Parameters:
+

proto_bytes – Bytes to read the Substrait plan from.

+
+
Returns:
+

Substrait plan.

+
+
+
+ +
+
+static serialize(sql: str, ctx: datafusion.context.SessionContext, path: str | pathlib.Path) None
+

Serialize a SQL query to a Substrait plan and write it to a file.

+
+
Parameters:
+
    +
  • sql – SQL query to serialize.

  • +
  • ctx – SessionContext to use.

  • +
  • path – Path to write the Substrait plan to.

  • +
+
+
+
+ +
+
+static serialize_bytes(sql: str, ctx: datafusion.context.SessionContext) bytes
+

Serialize a SQL query to a Substrait plan as bytes.

+
+
Parameters:
+
    +
  • sql – SQL query to serialize.

  • +
  • ctx – SessionContext to use.

  • +
+
+
Returns:
+

Substrait plan as bytes.

+
+
+
+ +
+
+static serialize_to_plan(sql: str, ctx: datafusion.context.SessionContext) Plan
+

Serialize a SQL query to a Substrait plan.

+

Args: +sql: SQL query to serialize. +ctx: SessionContext to use.

+
+
Returns:
+

Substrait plan.

+
+
+
+ +
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/unparser/index.html b/autoapi/datafusion/unparser/index.html new file mode 100644 index 000000000..f88e76e22 --- /dev/null +++ b/autoapi/datafusion/unparser/index.html @@ -0,0 +1,699 @@ + + + + + + + + datafusion.unparser — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

datafusion.unparser

+

This module provides support for unparsing datafusion plans to SQL.

+

For additional information about unparsing, see https://docs.rs/datafusion-sql/latest/datafusion_sql/unparser/index.html

+
+

Classes

+ + + + + + + + + +

Dialect

DataFusion data catalog.

Unparser

DataFusion unparser.

+
+
+

Module Contents

+
+
+class datafusion.unparser.Dialect(dialect: datafusion._internal.unparser.Dialect)
+

DataFusion data catalog.

+

This constructor is not typically called by the end user.

+
+
+static default() Dialect
+

Create a new default dialect.

+
+ +
+
+static duckdb() Dialect
+

Create a new DuckDB dialect.

+
+ +
+
+static mysql() Dialect
+

Create a new MySQL dialect.

+
+ +
+
+static postgres() Dialect
+

Create a new PostgreSQL dialect.

+
+ +
+
+static sqlite() Dialect
+

Create a new SQLite dialect.

+
+ +
+
+dialect
+
+ +
+ +
+
+class datafusion.unparser.Unparser(dialect: Dialect)
+

DataFusion unparser.

+

This constructor is not typically called by the end user.

+
+
+plan_to_sql(plan: datafusion.plan.LogicalPlan) str
+

Convert a logical plan to a SQL string.

+
+ +
+
+with_pretty(pretty: bool) Unparser
+

Set the pretty flag.

+
+ +
+
+unparser
+
+ +
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/datafusion/user_defined/index.html b/autoapi/datafusion/user_defined/index.html new file mode 100644 index 000000000..4dbf00e96 --- /dev/null +++ b/autoapi/datafusion/user_defined/index.html @@ -0,0 +1,1916 @@ + + + + + + + + datafusion.user_defined — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

datafusion.user_defined

+

Provides the user-defined functions for evaluation of dataframes.

+
+

Attributes

+ + + + + + + + + + + + + + + + + + +

_R

udaf

udf

udtf

udwf

+
+
+

Classes

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Accumulator

Defines how an AggregateUDF accumulates values.

AggregateUDF

Class for performing scalar user-defined functions (UDF).

AggregateUDFExportable

Type hint for object that has __datafusion_aggregate_udf__ PyCapsule.

ScalarUDF

Class for performing scalar user-defined functions (UDF).

ScalarUDFExportable

Type hint for object that has __datafusion_scalar_udf__ PyCapsule.

TableFunction

Class for performing user-defined table functions (UDTF).

Volatility

Defines how stable or volatile a function is.

WindowEvaluator

Evaluator class for user-defined window functions (UDWF).

WindowUDF

Class for performing window user-defined functions (UDF).

WindowUDFExportable

Type hint for object that has __datafusion_window_udf__ PyCapsule.

+
+
+

Functions

+ + + + + + + + + + + + +

_is_pycapsule(→ TypeGuard[_typeshed.CapsuleType])

Return True when value is a CPython PyCapsule.

data_type_or_field_to_field(→ pyarrow.Field)

Helper function to return a Field from either a Field or DataType.

data_types_or_fields_to_field_list(→ list[pyarrow.Field])

Helper function to return a list of Fields.

+
+
+

Module Contents

+
+
+class datafusion.user_defined.Accumulator
+

Defines how an AggregateUDF accumulates values.

+
+
+abstract evaluate() pyarrow.Scalar
+

Return the resultant value.

+

While this function template expects a PyArrow Scalar value return type, +you can return any value that can be converted into a Scalar. This +includes basic Python data types such as integers and strings. In +addition to primitive types, we currently support PyArrow, nanoarrow, +and arro3 objects in addition to primitive data types. Other objects +that support the Arrow FFI standard will be given a “best attempt” at +conversion to scalar objects.

+
+ +
+
+abstract merge(states: list[pyarrow.Array]) None
+

Merge a set of states.

+
+ +
+
+abstract state() list[pyarrow.Scalar]
+

Return the current state.

+

While this function template expects PyArrow Scalar values return type, +you can return any value that can be converted into a Scalar. This +includes basic Python data types such as integers and strings. In +addition to primitive types, we currently support PyArrow, nanoarrow, +and arro3 objects in addition to primitive data types. Other objects +that support the Arrow FFI standard will be given a “best attempt” at +conversion to scalar objects.

+
+ +
+
+abstract update(*values: pyarrow.Array) None
+

Evaluate an array of values and update state.

+
+ +
+ +
+
+class datafusion.user_defined.AggregateUDF(name: str, accumulator: collections.abc.Callable[[], Accumulator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, state_type: list[pyarrow.DataType], volatility: Volatility | str)
+
+class datafusion.user_defined.AggregateUDF(name: str, accumulator: AggregateUDFExportable, input_types: None = ..., return_type: None = ..., state_type: None = ..., volatility: None = ...)
+

Class for performing scalar user-defined functions (UDF).

+

Aggregate UDFs operate on a group of rows and return a single value. See +also ScalarUDF for operating on a row by row basis.

+

Instantiate a user-defined aggregate function (UDAF).

+

See udaf() for a convenience function and argument +descriptions.

+
+
+__call__(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Execute the UDAF.

+

This function is not typically called by an end user. These calls will +occur during the evaluation of the dataframe.

+
+ +
+
+__repr__() str
+

Print a string representation of the Aggregate UDF.

+
+ +
+
+static from_pycapsule(func: AggregateUDFExportable | _typeshed.CapsuleType) AggregateUDF
+

Create an Aggregate UDF from AggregateUDF PyCapsule object.

+

This function will instantiate a Aggregate UDF that uses a DataFusion +AggregateUDF that is exported via the FFI bindings.

+
+ +
+
+static udaf(input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, state_type: list[pyarrow.DataType], volatility: Volatility | str, name: str | None = None) collections.abc.Callable[Ellipsis, AggregateUDF]
+
+static udaf(accum: collections.abc.Callable[[], Accumulator], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, state_type: list[pyarrow.DataType], volatility: Volatility | str, name: str | None = None) AggregateUDF
+
+static udaf(accum: AggregateUDFExportable) AggregateUDF
+
+static udaf(accum: _typeshed.CapsuleType) AggregateUDF
+

Create a new User-Defined Aggregate Function (UDAF).

+

This class allows you to define an aggregate function that can be used in +data aggregation or window function calls.

+
+
Usage:
    +
  • As a function: udaf(accum, input_types, return_type, state_type, volatility, name).

  • +
  • As a decorator: @udaf(input_types, return_type, state_type, volatility, name). +When using udaf as a decorator, do not pass accum explicitly.

  • +
+
+
+

Function example:

+

If your Accumulator can be instantiated with no arguments, you +can simply pass it’s type as accum. If you need to pass additional +arguments to it’s constructor, you can define a lambda or a factory method. +During runtime the Accumulator will be constructed for every +instance in which this UDAF is used. The following examples are all valid:

+
import pyarrow as pa
+import pyarrow.compute as pc
+
+class Summarize(Accumulator):
+    def __init__(self, bias: float = 0.0):
+        self._sum = pa.scalar(bias)
+
+    def state(self) -> list[pa.Scalar]:
+        return [self._sum]
+
+    def update(self, values: pa.Array) -> None:
+        self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py())
+
+    def merge(self, states: list[pa.Array]) -> None:
+        self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py())
+
+    def evaluate(self) -> pa.Scalar:
+        return self._sum
+
+def sum_bias_10() -> Summarize:
+    return Summarize(10.0)
+
+udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()],
+    "immutable")
+udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()],
+    "immutable")
+udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(),
+    [pa.float64()], "immutable")
+
+
+

Decorator example::

+
@udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable")
+def udf4() -> Summarize:
+    return Summarize(10.0)
+
+
+
+
Parameters:
+
    +
  • accum – The accumulator python function. Only needed when calling as a +function. Skip this argument when using udaf as a decorator. +If you have a Rust backed AggregateUDF within a PyCapsule, you can +pass this parameter and ignore the rest. They will be determined +directly from the underlying function. See the online documentation +for more information.

  • +
  • input_types – The data types of the arguments to accum.

  • +
  • return_type – The data type of the return value.

  • +
  • state_type – The data types of the intermediate accumulation.

  • +
  • volatility – See Volatility for allowed values.

  • +
  • name – A descriptive name for the function.

  • +
+
+
Returns:
+

A user-defined aggregate function, which can be used in either data +aggregation or window function calls.

+
+
+
+ +
+
+_udaf
+
+ +
+ +
+
+class datafusion.user_defined.AggregateUDFExportable
+

Bases: Protocol

+

Type hint for object that has __datafusion_aggregate_udf__ PyCapsule.

+
+
+__datafusion_aggregate_udf__() object
+
+ +
+ +
+
+class datafusion.user_defined.ScalarUDF(name: str, func: collections.abc.Callable[Ellipsis, _R], input_fields: list[pyarrow.Field], return_field: _R, volatility: Volatility | str)
+

Class for performing scalar user-defined functions (UDF).

+

Scalar UDFs operate on a row by row basis. See also AggregateUDF for +operating on a group of rows.

+

Instantiate a scalar user-defined function (UDF).

+

See helper method udf() for argument details.

+
+
+__call__(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Execute the UDF.

+

This function is not typically called by an end user. These calls will +occur during the evaluation of the dataframe.

+
+ +
+
+__repr__() str
+

Print a string representation of the Scalar UDF.

+
+ +
+
+static from_pycapsule(func: ScalarUDFExportable) ScalarUDF
+

Create a Scalar UDF from ScalarUDF PyCapsule object.

+

This function will instantiate a Scalar UDF that uses a DataFusion +ScalarUDF that is exported via the FFI bindings.

+
+ +
+
+static udf(input_fields: collections.abc.Sequence[pyarrow.DataType | pyarrow.Field] | pyarrow.DataType | pyarrow.Field, return_field: pyarrow.DataType | pyarrow.Field, volatility: Volatility | str, name: str | None = None) collections.abc.Callable[Ellipsis, ScalarUDF]
+
+static udf(func: collections.abc.Callable[Ellipsis, _R], input_fields: collections.abc.Sequence[pyarrow.DataType | pyarrow.Field] | pyarrow.DataType | pyarrow.Field, return_field: pyarrow.DataType | pyarrow.Field, volatility: Volatility | str, name: str | None = None) ScalarUDF
+
+static udf(func: ScalarUDFExportable) ScalarUDF
+

Create a new User-Defined Function (UDF).

+

This class can be used both as either a function or a decorator.

+
+
Usage:
    +
  • As a function: udf(func, input_fields, return_field, volatility, name).

  • +
  • As a decorator: @udf(input_fields, return_field, volatility, name). +When used a decorator, do not pass func explicitly.

  • +
+
+
+

In lieu of passing a PyArrow Field, you can pass a DataType for simplicity. +When you do so, it will be assumed that the nullability of the inputs and +output are True and that they have no metadata.

+
+
Parameters:
+
    +
  • func (Callable, optional) – Only needed when calling as a function. +Skip this argument when using udf as a decorator. If you have a Rust +backed ScalarUDF within a PyCapsule, you can pass this parameter +and ignore the rest. They will be determined directly from the +underlying function. See the online documentation for more information.

  • +
  • input_fields (list[pa.Field | pa.DataType]) – The data types or Fields +of the arguments to func. This list must be of the same length +as the number of arguments.

  • +
  • return_field (_R) – The field of the return value from the function.

  • +
  • volatility (Volatility | str) – See Volatility for allowed values.

  • +
  • name (Optional[str]) – A descriptive name for the function.

  • +
+
+
Returns:
+

A user-defined function that can be used in SQL expressions, +data aggregation, or window function calls.

+
+
+

Example: Using udf as a function:

+
def double_func(x):
+    return x * 2
+double_udf = udf(double_func, [pa.int32()], pa.int32(),
+"volatile", "double_it")
+
+
+

Example: Using udf as a decorator:

+
@udf([pa.int32()], pa.int32(), "volatile", "double_it")
+def double_udf(x):
+    return x * 2
+
+
+
+ +
+
+_udf
+
+ +
+ +
+
+class datafusion.user_defined.ScalarUDFExportable
+

Bases: Protocol

+

Type hint for object that has __datafusion_scalar_udf__ PyCapsule.

+
+
+__datafusion_scalar_udf__() object
+
+ +
+ +
+
+class datafusion.user_defined.TableFunction(name: str, func: collections.abc.Callable[[], any], ctx: datafusion.SessionContext | None = None)
+

Class for performing user-defined table functions (UDTF).

+

Table functions generate new table providers based on the +input expressions.

+

Instantiate a user-defined table function (UDTF).

+

See udtf() for a convenience function and argument +descriptions.

+
+
+__call__(*args: datafusion.expr.Expr) Any
+

Execute the UDTF and return a table provider.

+
+ +
+
+__repr__() str
+

User printable representation.

+
+ +
+
+static _create_table_udf(func: collections.abc.Callable[Ellipsis, Any], name: str) TableFunction
+

Create a TableFunction instance from function arguments.

+
+ +
+
+static _create_table_udf_decorator(name: str | None = None) collections.abc.Callable[[collections.abc.Callable[[], WindowEvaluator]], collections.abc.Callable[Ellipsis, datafusion.expr.Expr]]
+

Create a decorator for a WindowUDF.

+
+ +
+
+static udtf(name: str) collections.abc.Callable[Ellipsis, Any]
+
+static udtf(func: collections.abc.Callable[[], Any], name: str) TableFunction
+

Create a new User-Defined Table Function (UDTF).

+
+ +
+
+_udtf
+
+ +
+ +
+
+class datafusion.user_defined.Volatility
+

Bases: enum.Enum

+

Defines how stable or volatile a function is.

+

When setting the volatility of a function, you can either pass this +enumeration or a str. The str equivalent is the lower case value of the +name (“immutable”, “stable”, or “volatile”).

+
+
+__str__() str
+

Returns the string equivalent.

+
+ +
+
+Immutable = 1
+

An immutable function will always return the same output when given the +same input.

+

DataFusion will attempt to inline immutable functions during planning.

+
+ +
+
+Stable = 2
+

Returns the same value for a given input within a single queries.

+

A stable function may return different values given the same input across +different queries but must return the same value for a given input within a +query. An example of this is the Now function. DataFusion will attempt to +inline Stable functions during planning, when possible. For query +select col1, now() from t1, it might take a while to execute but now() +column will be the same for each output row, which is evaluated during +planning.

+
+ +
+
+Volatile = 3
+

A volatile function may change the return value from evaluation to +evaluation.

+

Multiple invocations of a volatile function may return different results +when used in the same query. An example of this is the random() function. +DataFusion can not evaluate such functions during planning. In the query +select col1, random() from t1, random() function will be evaluated +for each output row, resulting in a unique random value for each row.

+
+ +
+ +
+
+class datafusion.user_defined.WindowEvaluator
+

Evaluator class for user-defined window functions (UDWF).

+

It is up to the user to decide which evaluate function is appropriate.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

uses_window_frame

supports_bounded_execution

include_rank

function_to_implement

False (default)

False (default)

False (default)

evaluate_all

False

True

False

evaluate

False

True/False

True

evaluate_all_with_rank

True

True/False

True/False

evaluate

+
+
+evaluate(values: list[pyarrow.Array], eval_range: tuple[int, int]) pyarrow.Scalar
+

Evaluate window function on a range of rows in an input partition.

+

This is the simplest and most general function to implement +but also the least performant as it creates output one row at +a time. It is typically much faster to implement stateful +evaluation using one of the other specialized methods on this +trait.

+

Returns a [ScalarValue] that is the value of the window +function within range for the entire partition. Argument +values contains the evaluation result of function arguments +and evaluation results of ORDER BY expressions. If function has a +single argument, values[1..] will contain ORDER BY expression results.

+
+ +
+
+evaluate_all(values: list[pyarrow.Array], num_rows: int) pyarrow.Array
+

Evaluate a window function on an entire input partition.

+

This function is called once per input partition for window functions that +do not use values from the window frame, such as +row_number(), +rank(), +dense_rank(), +percent_rank(), +cume_dist(), +lead(), +and lag().

+

It produces the result of all rows in a single pass. It +expects to receive the entire partition as the value and +must produce an output column with one output row for every +input row.

+

num_rows is required to correctly compute the output in case +len(values) == 0

+

Implementing this function is an optimization. Certain window +functions are not affected by the window frame definition or +the query doesn’t have a frame, and evaluate skips the +(costly) window frame boundary calculation and the overhead of +calling evaluate for each output row.

+

For example, the LAG built in window function does not use +the values of its window frame (it can be computed in one shot +on the entire partition with Self::evaluate_all regardless of the +window defined in the OVER clause)

+
lag(x, 1) OVER (ORDER BY z ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING)
+
+
+

However, avg() computes the average in the window and thus +does use its window frame.

+
avg(x) OVER (PARTITION BY y ORDER BY z ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING)
+
+
+
+ +
+
+evaluate_all_with_rank(num_rows: int, ranks_in_partition: list[tuple[int, int]]) pyarrow.Array
+

Called for window functions that only need the rank of a row.

+

Evaluate the partition evaluator against the partition using +the row ranks. For example, rank(col("a")) produces

+
a | rank
+- + ----
+A | 1
+A | 1
+C | 3
+D | 4
+D | 4
+
+
+

For this case, num_rows would be 5 and the +ranks_in_partition would be called with

+
[
+    (0,1),
+    (2,2),
+    (3,4),
+]
+
+
+

The user must implement this method if include_rank returns True.

+
+ +
+
+get_range(idx: int, num_rows: int) tuple[int, int]
+

Return the range for the window function.

+

If uses_window_frame flag is false. This method is used to +calculate required range for the window function during +stateful execution.

+

Generally there is no required range, hence by default this +returns smallest range(current row). e.g seeing current row is +enough to calculate window result (such as row_number, rank, +etc)

+
+
Parameters:
+
    +
  • idx: – Current index:

  • +
  • num_rows – Number of rows.

  • +
+
+
+
+ +
+
+include_rank() bool
+

Can this function be evaluated with (only) rank?

+
+ +
+
+is_causal() bool
+

Get whether evaluator needs future data for its result.

+
+ +
+
+memoize() None
+

Perform a memoize operation to improve performance.

+

When the window frame has a fixed beginning (e.g UNBOUNDED +PRECEDING), some functions such as FIRST_VALUE and +NTH_VALUE do not need the (unbounded) input once they have +seen a certain amount of input.

+

memoize is called after each input batch is processed, and +such functions can save whatever they need

+
+ +
+
+supports_bounded_execution() bool
+

Can the window function be incrementally computed using bounded memory?

+
+ +
+
+uses_window_frame() bool
+

Does the window function use the values from the window frame?

+
+ +
+ +
+
+class datafusion.user_defined.WindowUDF(name: str, func: collections.abc.Callable[[], WindowEvaluator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str)
+

Class for performing window user-defined functions (UDF).

+

Window UDFs operate on a partition of rows. See +also ScalarUDF for operating on a row by row basis.

+

Instantiate a user-defined window function (UDWF).

+

See udwf() for a convenience function and argument +descriptions.

+
+
+__call__(*args: datafusion.expr.Expr) datafusion.expr.Expr
+

Execute the UDWF.

+

This function is not typically called by an end user. These calls will +occur during the evaluation of the dataframe.

+
+ +
+
+__repr__() str
+

Print a string representation of the Window UDF.

+
+ +
+
+static _create_window_udf(func: collections.abc.Callable[[], WindowEvaluator], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) WindowUDF
+

Create a WindowUDF instance from function arguments.

+
+ +
+
+static _create_window_udf_decorator(input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) collections.abc.Callable[[collections.abc.Callable[[], WindowEvaluator]], collections.abc.Callable[Ellipsis, datafusion.expr.Expr]]
+

Create a decorator for a WindowUDF.

+
+ +
+
+static _get_default_name(func: collections.abc.Callable) str
+

Get the default name for a function based on its attributes.

+
+ +
+
+static _normalize_input_types(input_types: pyarrow.DataType | list[pyarrow.DataType]) list[pyarrow.DataType]
+

Convert a single DataType to a list if needed.

+
+ +
+
+static from_pycapsule(func: WindowUDFExportable) WindowUDF
+

Create a Window UDF from WindowUDF PyCapsule object.

+

This function will instantiate a Window UDF that uses a DataFusion +WindowUDF that is exported via the FFI bindings.

+
+ +
+
+static udwf(input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) collections.abc.Callable[Ellipsis, WindowUDF]
+
+static udwf(func: collections.abc.Callable[[], WindowEvaluator], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: pyarrow.DataType, volatility: Volatility | str, name: str | None = None) WindowUDF
+

Create a new User-Defined Window Function (UDWF).

+

This class can be used both as either a function or a decorator.

+
+
Usage:
    +
  • As a function: udwf(func, input_types, return_type, volatility, name).

  • +
  • As a decorator: @udwf(input_types, return_type, volatility, name). +When using udwf as a decorator, do not pass func explicitly.

  • +
+
+
+

Function example:

+
import pyarrow as pa
+
+class BiasedNumbers(WindowEvaluator):
+    def __init__(self, start: int = 0) -> None:
+        self.start = start
+
+    def evaluate_all(self, values: list[pa.Array],
+        num_rows: int) -> pa.Array:
+        return pa.array([self.start + i for i in range(num_rows)])
+
+def bias_10() -> BiasedNumbers:
+    return BiasedNumbers(10)
+
+udwf1 = udwf(BiasedNumbers, pa.int64(), pa.int64(), "immutable")
+udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable")
+udwf3 = udwf(lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable")
+
+
+

Decorator example:

+
@udwf(pa.int64(), pa.int64(), "immutable")
+def biased_numbers() -> BiasedNumbers:
+    return BiasedNumbers(10)
+
+
+
+
Parameters:
+
    +
  • func – Only needed when calling as a function. Skip this argument when +using udwf as a decorator. If you have a Rust backed WindowUDF +within a PyCapsule, you can pass this parameter and ignore the rest. +They will be determined directly from the underlying function. See +the online documentation for more information.

  • +
  • input_types – The data types of the arguments.

  • +
  • return_type – The data type of the return value.

  • +
  • volatility – See Volatility for allowed values.

  • +
  • name – A descriptive name for the function.

  • +
+
+
Returns:
+

A user-defined window function that can be used in window function calls.

+
+
+
+ +
+
+_udwf
+
+ +
+ +
+
+class datafusion.user_defined.WindowUDFExportable
+

Bases: Protocol

+

Type hint for object that has __datafusion_window_udf__ PyCapsule.

+
+
+__datafusion_window_udf__() object
+
+ +
+ +
+
+datafusion.user_defined._is_pycapsule(value: object) TypeGuard[_typeshed.CapsuleType]
+

Return True when value is a CPython PyCapsule.

+
+ +
+
+datafusion.user_defined.data_type_or_field_to_field(value: pyarrow.DataType | pyarrow.Field, name: str) pyarrow.Field
+

Helper function to return a Field from either a Field or DataType.

+
+ +
+
+datafusion.user_defined.data_types_or_fields_to_field_list(inputs: collections.abc.Sequence[pyarrow.Field | pyarrow.DataType] | pyarrow.Field | pyarrow.DataType) list[pyarrow.Field]
+

Helper function to return a list of Fields.

+
+ +
+
+datafusion.user_defined._R
+
+ +
+
+datafusion.user_defined.udaf
+
+ +
+
+datafusion.user_defined.udf
+
+ +
+
+datafusion.user_defined.udtf
+
+ +
+
+datafusion.user_defined.udwf
+
+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/autoapi/index.html b/autoapi/index.html new file mode 100644 index 000000000..f209f86f6 --- /dev/null +++ b/autoapi/index.html @@ -0,0 +1,529 @@ + + + + + + + + API Reference — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ + + +
+ +
+ +
+ + +
+ + + + + + +
+ + + + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/benchmarks/db-benchmark/README.md b/benchmarks/db-benchmark/README.md deleted file mode 100644 index 8ce45344d..000000000 --- a/benchmarks/db-benchmark/README.md +++ /dev/null @@ -1,32 +0,0 @@ - - -# DataFusion Implementation of db-benchmark - -This directory contains scripts for running [db-benchmark](https://github.com/duckdblabs/db-benchmark) with -DataFusion's Python bindings. - -## Directions - -Run the following from root of this project. - -```bash -docker build -t db-benchmark -f benchmarks/db-benchmark/db-benchmark.dockerfile . -docker run --privileged -it db-benchmark -``` diff --git a/benchmarks/db-benchmark/db-benchmark.dockerfile b/benchmarks/db-benchmark/db-benchmark.dockerfile deleted file mode 100644 index af2edd0f4..000000000 --- a/benchmarks/db-benchmark/db-benchmark.dockerfile +++ /dev/null @@ -1,120 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -FROM ubuntu:22.04 -ARG DEBIAN_FRONTEND=noninteractive -ARG TARGETPLATFORM - -# This section is based on https://github.com/duckdblabs/db-benchmark/blob/master/_utils/repro.sh - -RUN apt-get -qq update -RUN apt-get -qq -y upgrade -RUN apt-get -qq install -y apt-utils - -RUN apt-get -qq install -y lsb-release software-properties-common wget curl vim htop git byobu libcurl4-openssl-dev libssl-dev -RUN apt-get -qq install -y libfreetype6-dev -RUN apt-get -qq install -y libfribidi-dev -RUN apt-get -qq install -y libharfbuzz-dev -RUN apt-get -qq install -y git -RUN apt-get -qq install -y libxml2-dev -RUN apt-get -qq install -y make -RUN apt-get -qq install -y libfontconfig1-dev -RUN apt-get -qq install -y libicu-dev pandoc zlib1g-dev libgit2-dev libcurl4-openssl-dev libssl-dev libjpeg-dev libpng-dev libtiff-dev -# apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 -RUN add-apt-repository "deb [arch=amd64,i386] https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/" - -RUN apt-get -qq install -y r-base-dev virtualenv - -RUN cd /usr/local/lib/R && \ - chmod o+w site-library - -RUN cd / && \ - git clone https://github.com/duckdblabs/db-benchmark.git - -WORKDIR /db-benchmark - -RUN mkdir -p .R && \ - echo 'CFLAGS=-O3 -mtune=native' >> .R/Makevars && \ - echo 'CXXFLAGS=-O3 -mtune=native' >> .R/Makevars - -RUN cd pydatatable && \ - virtualenv py-pydatatable --python=/usr/bin/python3.10 -RUN cd pandas && \ - virtualenv py-pandas --python=/usr/bin/python3.10 -RUN cd modin && \ - virtualenv py-modin --python=/usr/bin/python3.10 - -RUN Rscript -e 'install.packages(c("jsonlite","bit64","devtools","rmarkdown"), dependencies=TRUE, repos="https://cloud.r-project.org")' - -SHELL ["/bin/bash", "-c"] - -RUN source ./pandas/py-pandas/bin/activate && \ - python3 -m pip install --upgrade psutil && \ - python3 -m pip install --upgrade pandas && \ - deactivate - -RUN source ./modin/py-modin/bin/activate && \ - python3 -m pip install --upgrade modin && \ - deactivate - -RUN source ./pydatatable/py-pydatatable/bin/activate && \ - python3 -m pip install --upgrade git+https://github.com/h2oai/datatable && \ - deactivate - -## install dplyr -#RUN Rscript -e 'devtools::install_github(c("tidyverse/readr","tidyverse/dplyr"))' - -# install data.table -RUN Rscript -e 'install.packages("data.table", repos="https://rdatatable.gitlab.io/data.table/")' - -## generate data for groupby 0.5GB -RUN Rscript _data/groupby-datagen.R 1e7 1e2 0 0 -RUN #Rscript _data/groupby-datagen.R 1e8 1e2 0 0 -RUN #Rscript _data/groupby-datagen.R 1e9 1e2 0 0 - -RUN mkdir data && \ - mv G1_1e7_1e2_0_0.csv data/ - -# set only groupby task -RUN echo "Changing run.conf and _control/data.csv to run only groupby at 0.5GB" && \ - cp run.conf run.conf.original && \ - sed -i 's/groupby join groupby2014/groupby/g' run.conf && \ - sed -i 's/data.table dplyr pandas pydatatable spark dask clickhouse polars arrow duckdb/data.table dplyr duckdb/g' run.conf && \ - sed -i 's/DO_PUBLISH=true/DO_PUBLISH=false/g' run.conf - -## set sizes -RUN mv _control/data.csv _control/data.csv.original && \ - echo "task,data,nrow,k,na,sort,active" > _control/data.csv && \ - echo "groupby,G1_1e7_1e2_0_0,1e7,1e2,0,0,1" >> _control/data.csv - -RUN #./dplyr/setup-dplyr.sh -RUN #./datatable/setup-datatable.sh -RUN #./duckdb/setup-duckdb.sh - -# END OF SETUP - -RUN python3 -m pip install --upgrade pandas -RUN python3 -m pip install --upgrade polars psutil -RUN python3 -m pip install --upgrade datafusion - -# Now add our solution -RUN rm -rf datafusion-python 2>/dev/null && \ - mkdir datafusion-python -ADD benchmarks/db-benchmark/*.py datafusion-python/ -ADD benchmarks/db-benchmark/run-bench.sh . - -ENTRYPOINT [ "/db-benchmark/run-bench.sh" ] \ No newline at end of file diff --git a/benchmarks/db-benchmark/groupby-datafusion.py b/benchmarks/db-benchmark/groupby-datafusion.py deleted file mode 100644 index 533166695..000000000 --- a/benchmarks/db-benchmark/groupby-datafusion.py +++ /dev/null @@ -1,527 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import gc -import os -import timeit -from pathlib import Path - -import datafusion as df -import pyarrow as pa -from datafusion import ( - RuntimeEnvBuilder, - SessionConfig, - SessionContext, - col, -) -from datafusion import ( - functions as f, -) -from pyarrow import csv as pacsv - -print("# groupby-datafusion.py", flush=True) - -exec(Path.open("./_helpers/helpers.py").read()) - - -def ans_shape(batches) -> tuple[int, int]: - rows, cols = 0, 0 - for batch in batches: - rows += batch.num_rows - if cols == 0: - cols = batch.num_columns - else: - assert cols == batch.num_columns - return rows, cols - - -def execute(df) -> list: - print(df.execution_plan().display_indent()) - return df.collect() - - -ver = df.__version__ -git = "" -task = "groupby" -solution = "datafusion" -fun = ".groupby" -cache = "TRUE" -on_disk = "FALSE" - -# experimental - support running with both DataFrame and SQL APIs -sql = True - -data_name = os.environ["SRC_DATANAME"] -src_grp = "data" / data_name / ".csv" -print("loading dataset %s" % src_grp, flush=True) - -schema = pa.schema( - [ - ("id4", pa.int32()), - ("id5", pa.int32()), - ("id6", pa.int32()), - ("v1", pa.int32()), - ("v2", pa.int32()), - ("v3", pa.float64()), - ] -) - -data = pacsv.read_csv( - src_grp, - convert_options=pacsv.ConvertOptions(auto_dict_encode=True, column_types=schema), -) -print("dataset loaded") - -# create a session context with explicit runtime and config settings -runtime = ( - RuntimeEnvBuilder() - .with_disk_manager_os() - .with_fair_spill_pool(64 * 1024 * 1024 * 1024) -) -config = ( - SessionConfig() - .with_repartition_joins(enabled=False) - .with_repartition_aggregations(enabled=False) - .set("datafusion.execution.coalesce_batches", "false") -) -ctx = SessionContext(config, runtime) -print(ctx) - -ctx.register_record_batches("x", [data.to_batches()]) -print("registered record batches") -# cols = ctx.sql("SHOW columns from x") -# ans.show() - -in_rows = data.num_rows -# print(in_rows, flush=True) - -task_init = timeit.default_timer() - -question = "sum v1 by id1" # q1 -gc.collect() -t_start = timeit.default_timer() -if sql: - df = ctx.sql("SELECT id1, SUM(v1) AS v1 FROM x GROUP BY id1") -else: - df = ctx.table("x").aggregate([f.col("id1")], [f.sum(f.col("v1")).alias("v1")]) -ans = execute(df) - -shape = ans_shape(ans) -print(shape, flush=True) -t = timeit.default_timer() - t_start -print(f"q1: {t}") -m = memory_usage() -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = df.aggregate([], [f.sum(col("v1"))]).collect()[0].column(0)[0] -chkt = timeit.default_timer() - t_start -write_log( - task=task, - data=data_name, - in_rows=in_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -question = "sum v1 by id1:id2" # q2 -gc.collect() -t_start = timeit.default_timer() -if sql: - df = ctx.sql("SELECT id1, id2, SUM(v1) AS v1 FROM x GROUP BY id1, id2") -else: - df = ctx.table("x").aggregate( - [f.col("id1"), f.col("id2")], [f.sum(f.col("v1")).alias("v1")] - ) -ans = execute(df) -shape = ans_shape(ans) -print(shape, flush=True) -t = timeit.default_timer() - t_start -print(f"q2: {t}") -m = memory_usage() -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = df.aggregate([], [f.sum(col("v1"))]).collect()[0].column(0)[0] -chkt = timeit.default_timer() - t_start -write_log( - task=task, - data=data_name, - in_rows=in_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -question = "sum v1 mean v3 by id3" # q3 -gc.collect() -t_start = timeit.default_timer() -if sql: - df = ctx.sql("SELECT id3, SUM(v1) AS v1, AVG(v3) AS v3 FROM x GROUP BY id3") -else: - df = ctx.table("x").aggregate( - [f.col("id3")], - [ - f.sum(f.col("v1")).alias("v1"), - f.avg(f.col("v3")).alias("v3"), - ], - ) -ans = execute(df) -shape = ans_shape(ans) -print(shape, flush=True) -t = timeit.default_timer() - t_start -print(f"q3: {t}") -m = memory_usage() -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = ( - df.aggregate([], [f.sum(col("v1")), f.sum(col("v3"))]) - .collect()[0] - .to_pandas() - .to_numpy()[0] -) -chkt = timeit.default_timer() - t_start -write_log( - task=task, - data=data_name, - in_rows=in_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -question = "mean v1:v3 by id4" # q4 -gc.collect() -t_start = timeit.default_timer() -ans = ctx.sql( - "SELECT id4, AVG(v1) AS v1, AVG(v2) AS v2, AVG(v3) AS v3 FROM x GROUP BY id4" -).collect() -shape = ans_shape(ans) -print(shape, flush=True) -t = timeit.default_timer() - t_start -print(f"q4: {t}") -m = memory_usage() -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = ( - df.aggregate([], [f.sum(col("v1")), f.sum(col("v2")), f.sum(col("v3"))]) - .collect()[0] - .to_pandas() - .to_numpy()[0] -) -chkt = timeit.default_timer() - t_start -write_log( - task=task, - data=data_name, - in_rows=in_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -question = "sum v1:v3 by id6" # q5 -gc.collect() -t_start = timeit.default_timer() -ans = ctx.sql( - "SELECT id6, SUM(v1) AS v1, SUM(v2) AS v2, SUM(v3) AS v3 FROM x GROUP BY id6" -).collect() -shape = ans_shape(ans) -print(shape, flush=True) -t = timeit.default_timer() - t_start -print(f"q5: {t}") -m = memory_usage() -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = ( - df.aggregate([], [f.sum(col("v1")), f.sum(col("v2")), f.sum(col("v3"))]) - .collect()[0] - .to_pandas() - .to_numpy()[0] -) -chkt = timeit.default_timer() - t_start -write_log( - task=task, - data=data_name, - in_rows=in_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -question = "median v3 sd v3 by id4 id5" # q6 -gc.collect() -t_start = timeit.default_timer() -ans = ctx.sql( - "SELECT id4, id5, approx_percentile_cont(v3, .5) AS median_v3, stddev(v3) AS stddev_v3 FROM x GROUP BY id4, id5" -).collect() -shape = ans_shape(ans) -print(shape, flush=True) -t = timeit.default_timer() - t_start -print(f"q6: {t}") -m = memory_usage() -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = ( - df.aggregate([], [f.sum(col("median_v3")), f.sum(col("stddev_v3"))]) - .collect()[0] - .to_pandas() - .to_numpy()[0] -) -chkt = timeit.default_timer() - t_start -write_log( - task=task, - data=data_name, - in_rows=in_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -question = "max v1 - min v2 by id3" # q7 -gc.collect() -t_start = timeit.default_timer() -ans = ctx.sql( - "SELECT id3, MAX(v1) - MIN(v2) AS range_v1_v2 FROM x GROUP BY id3" -).collect() -shape = ans_shape(ans) -print(shape, flush=True) -t = timeit.default_timer() - t_start -print(f"q7: {t}") -m = memory_usage() -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = df.aggregate([], [f.sum(col("range_v1_v2"))]).collect()[0].column(0)[0] -chkt = timeit.default_timer() - t_start -write_log( - task=task, - data=data_name, - in_rows=in_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -question = "largest two v3 by id6" # q8 -gc.collect() -t_start = timeit.default_timer() -ans = ctx.sql( - "SELECT id6, v3 from (SELECT id6, v3, row_number() OVER (PARTITION BY id6 ORDER BY v3 DESC) AS row FROM x) t WHERE row <= 2" -).collect() -shape = ans_shape(ans) -print(shape, flush=True) -t = timeit.default_timer() - t_start -print(f"q8: {t}") -m = memory_usage() -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = df.aggregate([], [f.sum(col("v3"))]).collect()[0].column(0)[0] -chkt = timeit.default_timer() - t_start -write_log( - task=task, - data=data_name, - in_rows=in_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -question = "regression v1 v2 by id2 id4" # q9 -gc.collect() -t_start = timeit.default_timer() -ans = ctx.sql("SELECT corr(v1, v2) as corr FROM x GROUP BY id2, id4").collect() -shape = ans_shape(ans) -print(shape, flush=True) -t = timeit.default_timer() - t_start -print(f"q9: {t}") -m = memory_usage() -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = df.aggregate([], [f.sum(col("corr"))]).collect()[0].column(0)[0] -chkt = timeit.default_timer() - t_start -write_log( - task=task, - data=data_name, - in_rows=in_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -question = "sum v3 count by id1:id6" # q10 -gc.collect() -t_start = timeit.default_timer() -ans = ctx.sql( - "SELECT id1, id2, id3, id4, id5, id6, SUM(v3) as v3, COUNT(*) AS cnt FROM x GROUP BY id1, id2, id3, id4, id5, id6" -).collect() -shape = ans_shape(ans) -print(shape, flush=True) -t = timeit.default_timer() - t_start -print(f"q10: {t}") -m = memory_usage() -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = ( - df.aggregate([], [f.sum(col("v3")), f.sum(col("cnt"))]) - .collect()[0] - .to_pandas() - .to_numpy()[0] -) -chkt = timeit.default_timer() - t_start -write_log( - task=task, - data=data_name, - in_rows=in_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -print( - "grouping finished, took %0.fs" % (timeit.default_timer() - task_init), - flush=True, -) - -exit(0) diff --git a/benchmarks/db-benchmark/join-datafusion.py b/benchmarks/db-benchmark/join-datafusion.py deleted file mode 100755 index 3be296c81..000000000 --- a/benchmarks/db-benchmark/join-datafusion.py +++ /dev/null @@ -1,299 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import gc -import os -import timeit -from pathlib import Path - -import datafusion as df -from datafusion import col -from datafusion import functions as f -from pyarrow import csv as pacsv - -print("# join-datafusion.py", flush=True) - -exec(Path.open("./_helpers/helpers.py").read()) - - -def ans_shape(batches) -> tuple[int, int]: - rows, cols = 0, 0 - for batch in batches: - rows += batch.num_rows - if cols == 0: - cols = batch.num_columns - else: - assert cols == batch.num_columns - return rows, cols - - -ver = df.__version__ -task = "join" -git = "" -solution = "datafusion" -fun = ".join" -cache = "TRUE" -on_disk = "FALSE" - -data_name = os.environ["SRC_DATANAME"] -src_jn_x = "data" / data_name / ".csv" -y_data_name = join_to_tbls(data_name) -src_jn_y = [ - "data" / y_data_name[0] / ".csv", - "data" / y_data_name[1] / ".csv", - "data" / y_data_name[2] / ".csv", -] -if len(src_jn_y) != 3: - error_msg = "Something went wrong in preparing files used for join" - raise Exception(error_msg) - -print( - "loading datasets " - + data_name - + ", " - + y_data_name[0] - + ", " - + y_data_name[1] - + ", " - + y_data_name[2], - flush=True, -) - -ctx = df.SessionContext() -print(ctx) - -# TODO we should be applying projections to these table reads to create relations -# of different sizes - -x_data = pacsv.read_csv( - src_jn_x, convert_options=pacsv.ConvertOptions(auto_dict_encode=True) -) -ctx.register_record_batches("x", [x_data.to_batches()]) -small_data = pacsv.read_csv( - src_jn_y[0], convert_options=pacsv.ConvertOptions(auto_dict_encode=True) -) -ctx.register_record_batches("small", [small_data.to_batches()]) -medium_data = pacsv.read_csv( - src_jn_y[1], convert_options=pacsv.ConvertOptions(auto_dict_encode=True) -) -ctx.register_record_batches("medium", [medium_data.to_batches()]) -large_data = pacsv.read_csv( - src_jn_y[2], convert_options=pacsv.ConvertOptions(auto_dict_encode=True) -) -ctx.register_record_batches("large", [large_data.to_batches()]) - -print(x_data.num_rows, flush=True) -print(small_data.num_rows, flush=True) -print(medium_data.num_rows, flush=True) -print(large_data.num_rows, flush=True) - -task_init = timeit.default_timer() -print("joining...", flush=True) - -question = "small inner on int" # q1 -gc.collect() -t_start = timeit.default_timer() -ans = ctx.sql( - "SELECT x.id1, x.id2, x.id3, x.id4 as xid4, small.id4 as smallid4, x.id5, x.id6, x.v1, small.v2 FROM x INNER JOIN small ON x.id1 = small.id1" -).collect() -# ans = ctx.sql("SELECT * FROM x INNER JOIN small ON x.id1 = small.id1").collect() -# print(set([b.schema for b in ans])) -shape = ans_shape(ans) -# print(shape, flush=True) -t = timeit.default_timer() - t_start -print(f"q1: {t}") -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = df.aggregate([], [f.sum(col("v1"))]).collect()[0].column(0)[0] -chkt = timeit.default_timer() - t_start -m = memory_usage() -write_log( - task=task, - data=data_name, - in_rows=x_data.num_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -question = "medium inner on int" # q2 -gc.collect() -t_start = timeit.default_timer() -ans = ctx.sql( - "SELECT x.id1 as xid1, medium.id1 as mediumid1, x.id2, x.id3, x.id4 as xid4, medium.id4 as mediumid4, x.id5 as xid5, medium.id5 as mediumid5, x.id6, x.v1, medium.v2 FROM x INNER JOIN medium ON x.id2 = medium.id2" -).collect() -shape = ans_shape(ans) -# print(shape, flush=True) -t = timeit.default_timer() - t_start -print(f"q2: {t}") -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))]).collect()[0].column(0)[0] -chkt = timeit.default_timer() - t_start -m = memory_usage() -write_log( - task=task, - data=data_name, - in_rows=x_data.num_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -question = "medium outer on int" # q3 -gc.collect() -t_start = timeit.default_timer() -ans = ctx.sql( - "SELECT x.id1 as xid1, medium.id1 as mediumid1, x.id2, x.id3, x.id4 as xid4, medium.id4 as mediumid4, x.id5 as xid5, medium.id5 as mediumid5, x.id6, x.v1, medium.v2 FROM x LEFT JOIN medium ON x.id2 = medium.id2" -).collect() -shape = ans_shape(ans) -# print(shape, flush=True) -t = timeit.default_timer() - t_start -print(f"q3: {t}") -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))]).collect()[0].column(0)[0] -chkt = timeit.default_timer() - t_start -m = memory_usage() -write_log( - task=task, - data=data_name, - in_rows=x_data.num_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -question = "medium inner on factor" # q4 -gc.collect() -t_start = timeit.default_timer() -ans = ctx.sql( - "SELECT x.id1 as xid1, medium.id1 as mediumid1, x.id2, x.id3, x.id4 as xid4, medium.id4 as mediumid4, x.id5 as xid5, medium.id5 as mediumid5, x.id6, x.v1, medium.v2 FROM x LEFT JOIN medium ON x.id5 = medium.id5" -).collect() -shape = ans_shape(ans) -# print(shape) -t = timeit.default_timer() - t_start -print(f"q4: {t}") -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))]).collect()[0].column(0)[0] -chkt = timeit.default_timer() - t_start -m = memory_usage() -write_log( - task=task, - data=data_name, - in_rows=x_data.num_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -question = "big inner on int" # q5 -gc.collect() -t_start = timeit.default_timer() -ans = ctx.sql( - "SELECT x.id1 as xid1, large.id1 as largeid1, x.id2 as xid2, large.id2 as largeid2, x.id3, x.id4 as xid4, large.id4 as largeid4, x.id5 as xid5, large.id5 as largeid5, x.id6 as xid6, large.id6 as largeid6, x.v1, large.v2 FROM x LEFT JOIN large ON x.id3 = large.id3" -).collect() -shape = ans_shape(ans) -# print(shape) -t = timeit.default_timer() - t_start -print(f"q5: {t}") -t_start = timeit.default_timer() -df = ctx.create_dataframe([ans]) -chk = df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))]).collect()[0].column(0)[0] -chkt = timeit.default_timer() - t_start -m = memory_usage() -write_log( - task=task, - data=data_name, - in_rows=x_data.num_rows, - question=question, - out_rows=shape[0], - out_cols=shape[1], - solution=solution, - version=ver, - git=git, - fun=fun, - run=1, - time_sec=t, - mem_gb=m, - cache=cache, - chk=make_chk([chk]), - chk_time_sec=chkt, - on_disk=on_disk, -) -del ans -gc.collect() - -print( - "joining finished, took %0.fs" % (timeit.default_timer() - task_init), - flush=True, -) - -exit(0) diff --git a/benchmarks/db-benchmark/run-bench.sh b/benchmarks/db-benchmark/run-bench.sh deleted file mode 100755 index 36a6087d9..000000000 --- a/benchmarks/db-benchmark/run-bench.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -set -e - -#SRC_DATANAME=G1_1e7_1e2_0_0 python3 /db-benchmark/polars/groupby-polars.py -SRC_DATANAME=G1_1e7_1e2_0_0 python3 /db-benchmark/datafusion-python/groupby-datafusion.py - -# joins need more work still -#SRC_DATANAME=G1_1e7_1e2_0_0 python3 /db-benchmark/datafusion-python/join-datafusion.py -#SRC_DATANAME=G1_1e7_1e2_0_0 python3 /db-benchmark/polars/join-polars.py - -cat time.csv diff --git a/benchmarks/max_cpu_usage.py b/benchmarks/max_cpu_usage.py deleted file mode 100644 index ae73baad6..000000000 --- a/benchmarks/max_cpu_usage.py +++ /dev/null @@ -1,107 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Benchmark script showing how to maximize CPU usage. - -This script demonstrates one example of tuning DataFusion for improved parallelism -and CPU utilization. It uses synthetic in-memory data and performs simple aggregation -operations to showcase the impact of partitioning configuration. - -IMPORTANT: This is a simplified example designed to illustrate partitioning concepts. -Actual performance in your applications may vary significantly based on many factors: - -- Type of table providers (Parquet files, CSV, databases, etc.) -- I/O operations and storage characteristics (local disk, network, cloud storage) -- Query complexity and operation types (joins, window functions, complex expressions) -- Data distribution and size characteristics -- Memory available and hardware specifications -- Network latency for distributed data sources - -It is strongly recommended that you create similar benchmarks tailored to your specific: -- Hardware configuration -- Data sources and formats -- Typical query patterns and workloads -- Performance requirements - -This will give you more accurate insights into how DataFusion configuration options -will affect your particular use case. -""" - -from __future__ import annotations - -import argparse -import multiprocessing -import time - -import pyarrow as pa -from datafusion import SessionConfig, SessionContext, col -from datafusion import functions as f - - -def main(num_rows: int, partitions: int) -> None: - """Run a simple aggregation after repartitioning. - - This function demonstrates basic partitioning concepts using synthetic data. - Real-world performance will depend on your specific data sources, query types, - and system configuration. - """ - # Create some example data (synthetic in-memory data for demonstration) - # Note: Real applications typically work with files, databases, or other - # data sources that have different I/O and distribution characteristics - array = pa.array(range(num_rows)) - batch = pa.record_batch([array], names=["a"]) - - # Configure the session to use a higher target partition count and - # enable automatic repartitioning. - config = ( - SessionConfig() - .with_target_partitions(partitions) - .with_repartition_joins(enabled=True) - .with_repartition_aggregations(enabled=True) - .with_repartition_windows(enabled=True) - ) - ctx = SessionContext(config) - - # Register the input data and repartition manually to ensure that all - # partitions are used. - df = ctx.create_dataframe([[batch]]).repartition(partitions) - - start = time.time() - df = df.aggregate([], [f.sum(col("a"))]) - df.collect() - end = time.time() - - print( - f"Processed {num_rows} rows using {partitions} partitions in {end - start:.3f}s" - ) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "--rows", - type=int, - default=1_000_000, - help="Number of rows in the generated dataset", - ) - parser.add_argument( - "--partitions", - type=int, - default=multiprocessing.cpu_count(), - help="Target number of partitions to use", - ) - args = parser.parse_args() - main(args.rows, args.partitions) diff --git a/benchmarks/tpch/.gitignore b/benchmarks/tpch/.gitignore deleted file mode 100644 index 4471c6d15..000000000 --- a/benchmarks/tpch/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -data -results.csv \ No newline at end of file diff --git a/benchmarks/tpch/README.md b/benchmarks/tpch/README.md deleted file mode 100644 index a118a7449..000000000 --- a/benchmarks/tpch/README.md +++ /dev/null @@ -1,78 +0,0 @@ - - -# DataFusion Python Benchmarks Derived from TPC-H - -## Create Release Build - -From repo root: - -```bash -maturin develop --release -``` - -Note that release builds take a really long time, so you may want to temporarily comment out this section of the -root Cargo.toml when frequently building. - -```toml -[profile.release] -lto = true -codegen-units = 1 -``` - -## Generate Data - -```bash -./tpch-gen.sh 1 -``` - -## Run Benchmarks - -```bash -python tpch.py ./data ./queries -``` - -A summary of the benchmark timings will be written to `results.csv`. For example: - -```csv -setup,1.4 -q1,2978.6 -q2,679.7 -q3,2943.7 -q4,2894.9 -q5,3592.3 -q6,1691.4 -q7,3003.9 -q8,3818.7 -q9,4237.9 -q10,2344.7 -q11,526.1 -q12,2284.6 -q13,1009.2 -q14,1738.4 -q15,1942.1 -q16,499.8 -q17,5178.9 -q18,4127.7 -q19,2056.6 -q20,2162.5 -q21,8046.5 -q22,754.9 -total,58513.2 -``` \ No newline at end of file diff --git a/benchmarks/tpch/create_tables.sql b/benchmarks/tpch/create_tables.sql deleted file mode 100644 index 9f3aeea20..000000000 --- a/benchmarks/tpch/create_tables.sql +++ /dev/null @@ -1,143 +0,0 @@ --- Schema derived from TPC-H schema under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. - -CREATE EXTERNAL TABLE customer ( - c_custkey INT NOT NULL, - c_name VARCHAR NOT NULL, - c_address VARCHAR NOT NULL, - c_nationkey INT NOT NULL, - c_phone VARCHAR NOT NULL, - c_acctbal DECIMAL(15, 2) NOT NULL, - c_mktsegment VARCHAR NOT NULL, - c_comment VARCHAR NOT NULL, - c_extra VARCHAR NOT NULL, -) -STORED AS CSV -OPTIONS ( - format.delimiter '|', - format.has_header true -) -LOCATION '$PATH/customer.csv'; - -CREATE EXTERNAL TABLE lineitem ( - l_orderkey INT NOT NULL, - l_partkey INT NOT NULL, - l_suppkey INT NOT NULL, - l_linenumber INT NOT NULL, - l_quantity DECIMAL(15, 2) NOT NULL, - l_extendedprice DECIMAL(15, 2) NOT NULL, - l_discount DECIMAL(15, 2) NOT NULL, - l_tax DECIMAL(15, 2) NOT NULL, - l_returnflag VARCHAR NOT NULL, - l_linestatus VARCHAR NOT NULL, - l_shipdate DATE NOT NULL, - l_commitdate DATE NOT NULL, - l_receiptdate DATE NOT NULL, - l_shipinstruct VARCHAR NOT NULL, - l_shipmode VARCHAR NOT NULL, - l_comment VARCHAR NOT NULL, - l_extra VARCHAR NOT NULL, -) -STORED AS CSV -OPTIONS ( - format.delimiter '|', - format.has_header true -) -LOCATION '$PATH/lineitem.csv'; - -CREATE EXTERNAL TABLE nation ( - n_nationkey INT NOT NULL, - n_name VARCHAR NOT NULL, - n_regionkey INT NOT NULL, - n_comment VARCHAR NOT NULL, - n_extra VARCHAR NOT NULL, -) -STORED AS CSV -OPTIONS ( - format.delimiter '|', - format.has_header true -) -LOCATION '$PATH/nation.csv'; - -CREATE EXTERNAL TABLE orders ( - o_orderkey INT NOT NULL, - o_custkey INT NOT NULL, - o_orderstatus VARCHAR NOT NULL, - o_totalprice DECIMAL(15, 2) NOT NULL, - o_orderdate DATE NOT NULL, - o_orderpriority VARCHAR NOT NULL, - o_clerk VARCHAR NOT NULL, - o_shippriority INT NULL, - o_comment VARCHAR NOT NULL, - o_extra VARCHAR NOT NULL, -) -STORED AS CSV -OPTIONS ( - format.delimiter '|', - format.has_header true -) -LOCATION '$PATH/orders.csv'; - -CREATE EXTERNAL TABLE part ( - p_partkey INT NOT NULL, - p_name VARCHAR NOT NULL, - p_mfgr VARCHAR NOT NULL, - p_brand VARCHAR NOT NULL, - p_type VARCHAR NOT NULL, - p_size INT NULL, - p_container VARCHAR NOT NULL, - p_retailprice DECIMAL(15, 2) NOT NULL, - p_comment VARCHAR NOT NULL, - p_extra VARCHAR NOT NULL, -) -STORED AS CSV -OPTIONS ( - format.delimiter '|', - format.has_header true -) -LOCATION '$PATH/part.csv'; - -CREATE EXTERNAL TABLE partsupp ( - ps_partkey INT NOT NULL, - ps_suppkey INT NOT NULL, - ps_availqty INT NOT NULL, - ps_supplycost DECIMAL(15, 2) NOT NULL, - ps_comment VARCHAR NOT NULL, - ps_extra VARCHAR NOT NULL, -) -STORED AS CSV -OPTIONS ( - format.delimiter '|', - format.has_header true -) -LOCATION '$PATH/partsupp.csv'; - -CREATE EXTERNAL TABLE region ( - r_regionkey INT NOT NULL, - r_name VARCHAR NOT NULL, - r_comment VARCHAR NOT NULL, - r_extra VARCHAR NOT NULL, -) -STORED AS CSV -OPTIONS ( - format.delimiter '|', - format.has_header true -) -LOCATION '$PATH/region.csv'; - -CREATE EXTERNAL TABLE supplier ( - s_suppkey INT NOT NULL, - s_name VARCHAR NOT NULL, - s_address VARCHAR NOT NULL, - s_nationkey INT NOT NULL, - s_phone VARCHAR NOT NULL, - s_acctbal DECIMAL(15, 2) NOT NULL, - s_comment VARCHAR NOT NULL, - s_extra VARCHAR NOT NULL, -) -STORED AS CSV -OPTIONS ( - format.delimiter '|', - format.has_header true -) -LOCATION '$PATH/supplier.csv'; \ No newline at end of file diff --git a/benchmarks/tpch/queries/q1.sql b/benchmarks/tpch/queries/q1.sql deleted file mode 100644 index e7e8e32b8..000000000 --- a/benchmarks/tpch/queries/q1.sql +++ /dev/null @@ -1,23 +0,0 @@ --- Benchmark Query 1 derived from TPC-H query 1 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - l_returnflag, - l_linestatus, - sum(l_quantity) as sum_qty, - sum(l_extendedprice) as sum_base_price, - sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, - sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, - avg(l_quantity) as avg_qty, - avg(l_extendedprice) as avg_price, - avg(l_discount) as avg_disc, - count(*) as count_order -from - lineitem -where - l_shipdate <= date '1998-12-01' - interval '68 days' -group by - l_returnflag, - l_linestatus -order by - l_returnflag, - l_linestatus; diff --git a/benchmarks/tpch/queries/q10.sql b/benchmarks/tpch/queries/q10.sql deleted file mode 100644 index 8391f6277..000000000 --- a/benchmarks/tpch/queries/q10.sql +++ /dev/null @@ -1,33 +0,0 @@ --- Benchmark Query 10 derived from TPC-H query 10 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - c_custkey, - c_name, - sum(l_extendedprice * (1 - l_discount)) as revenue, - c_acctbal, - n_name, - c_address, - c_phone, - c_comment -from - customer, - orders, - lineitem, - nation -where - c_custkey = o_custkey - and l_orderkey = o_orderkey - and o_orderdate >= date '1993-07-01' - and o_orderdate < date '1993-07-01' + interval '3' month - and l_returnflag = 'R' - and c_nationkey = n_nationkey -group by - c_custkey, - c_name, - c_acctbal, - c_phone, - n_name, - c_address, - c_comment -order by - revenue desc limit 20; diff --git a/benchmarks/tpch/queries/q11.sql b/benchmarks/tpch/queries/q11.sql deleted file mode 100644 index 58776d369..000000000 --- a/benchmarks/tpch/queries/q11.sql +++ /dev/null @@ -1,29 +0,0 @@ --- Benchmark Query 11 derived from TPC-H query 11 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - ps_partkey, - sum(ps_supplycost * ps_availqty) as value -from - partsupp, - supplier, - nation -where - ps_suppkey = s_suppkey - and s_nationkey = n_nationkey - and n_name = 'ALGERIA' -group by - ps_partkey having - sum(ps_supplycost * ps_availqty) > ( - select - sum(ps_supplycost * ps_availqty) * 0.0001000000 - from - partsupp, - supplier, - nation - where - ps_suppkey = s_suppkey - and s_nationkey = n_nationkey - and n_name = 'ALGERIA' - ) -order by - value desc; diff --git a/benchmarks/tpch/queries/q12.sql b/benchmarks/tpch/queries/q12.sql deleted file mode 100644 index 0b973de98..000000000 --- a/benchmarks/tpch/queries/q12.sql +++ /dev/null @@ -1,30 +0,0 @@ --- Benchmark Query 12 derived from TPC-H query 12 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - l_shipmode, - sum(case - when o_orderpriority = '1-URGENT' - or o_orderpriority = '2-HIGH' - then 1 - else 0 - end) as high_line_count, - sum(case - when o_orderpriority <> '1-URGENT' - and o_orderpriority <> '2-HIGH' - then 1 - else 0 - end) as low_line_count -from - orders, - lineitem -where - o_orderkey = l_orderkey - and l_shipmode in ('FOB', 'SHIP') - and l_commitdate < l_receiptdate - and l_shipdate < l_commitdate - and l_receiptdate >= date '1995-01-01' - and l_receiptdate < date '1995-01-01' + interval '1' year -group by - l_shipmode -order by - l_shipmode; diff --git a/benchmarks/tpch/queries/q13.sql b/benchmarks/tpch/queries/q13.sql deleted file mode 100644 index 145dd6f10..000000000 --- a/benchmarks/tpch/queries/q13.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Benchmark Query 13 derived from TPC-H query 13 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - c_count, - count(*) as custdist -from - ( - select - c_custkey, - count(o_orderkey) - from - customer left outer join orders on - c_custkey = o_custkey - and o_comment not like '%express%requests%' - group by - c_custkey - ) as c_orders (c_custkey, c_count) -group by - c_count -order by - custdist desc, - c_count desc; diff --git a/benchmarks/tpch/queries/q14.sql b/benchmarks/tpch/queries/q14.sql deleted file mode 100644 index 1a91a04df..000000000 --- a/benchmarks/tpch/queries/q14.sql +++ /dev/null @@ -1,15 +0,0 @@ --- Benchmark Query 14 derived from TPC-H query 14 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - 100.00 * sum(case - when p_type like 'PROMO%' - then l_extendedprice * (1 - l_discount) - else 0 - end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue -from - lineitem, - part -where - l_partkey = p_partkey - and l_shipdate >= date '1995-02-01' - and l_shipdate < date '1995-02-01' + interval '1' month; diff --git a/benchmarks/tpch/queries/q15.sql b/benchmarks/tpch/queries/q15.sql deleted file mode 100644 index 68cc32cb7..000000000 --- a/benchmarks/tpch/queries/q15.sql +++ /dev/null @@ -1,33 +0,0 @@ --- Benchmark Query 15 derived from TPC-H query 15 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -create view revenue0 (supplier_no, total_revenue) as - select - l_suppkey, - sum(l_extendedprice * (1 - l_discount)) - from - lineitem - where - l_shipdate >= date '1996-08-01' - and l_shipdate < date '1996-08-01' + interval '3' month - group by - l_suppkey; -select - s_suppkey, - s_name, - s_address, - s_phone, - total_revenue -from - supplier, - revenue0 -where - s_suppkey = supplier_no - and total_revenue = ( - select - max(total_revenue) - from - revenue0 - ) -order by - s_suppkey; -drop view revenue0; diff --git a/benchmarks/tpch/queries/q16.sql b/benchmarks/tpch/queries/q16.sql deleted file mode 100644 index 098b4f3b3..000000000 --- a/benchmarks/tpch/queries/q16.sql +++ /dev/null @@ -1,32 +0,0 @@ --- Benchmark Query 16 derived from TPC-H query 16 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - p_brand, - p_type, - p_size, - count(distinct ps_suppkey) as supplier_cnt -from - partsupp, - part -where - p_partkey = ps_partkey - and p_brand <> 'Brand#14' - and p_type not like 'SMALL PLATED%' - and p_size in (14, 6, 5, 31, 49, 15, 41, 47) - and ps_suppkey not in ( - select - s_suppkey - from - supplier - where - s_comment like '%Customer%Complaints%' - ) -group by - p_brand, - p_type, - p_size -order by - supplier_cnt desc, - p_brand, - p_type, - p_size; diff --git a/benchmarks/tpch/queries/q17.sql b/benchmarks/tpch/queries/q17.sql deleted file mode 100644 index ed02d7b77..000000000 --- a/benchmarks/tpch/queries/q17.sql +++ /dev/null @@ -1,19 +0,0 @@ --- Benchmark Query 17 derived from TPC-H query 17 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - sum(l_extendedprice) / 7.0 as avg_yearly -from - lineitem, - part -where - p_partkey = l_partkey - and p_brand = 'Brand#42' - and p_container = 'LG BAG' - and l_quantity < ( - select - 0.2 * avg(l_quantity) - from - lineitem - where - l_partkey = p_partkey - ); diff --git a/benchmarks/tpch/queries/q18.sql b/benchmarks/tpch/queries/q18.sql deleted file mode 100644 index cf1f8c89a..000000000 --- a/benchmarks/tpch/queries/q18.sql +++ /dev/null @@ -1,34 +0,0 @@ --- Benchmark Query 18 derived from TPC-H query 18 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - c_name, - c_custkey, - o_orderkey, - o_orderdate, - o_totalprice, - sum(l_quantity) -from - customer, - orders, - lineitem -where - o_orderkey in ( - select - l_orderkey - from - lineitem - group by - l_orderkey having - sum(l_quantity) > 313 - ) - and c_custkey = o_custkey - and o_orderkey = l_orderkey -group by - c_name, - c_custkey, - o_orderkey, - o_orderdate, - o_totalprice -order by - o_totalprice desc, - o_orderdate limit 100; diff --git a/benchmarks/tpch/queries/q19.sql b/benchmarks/tpch/queries/q19.sql deleted file mode 100644 index 3968f0d24..000000000 --- a/benchmarks/tpch/queries/q19.sql +++ /dev/null @@ -1,37 +0,0 @@ --- Benchmark Query 19 derived from TPC-H query 19 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - sum(l_extendedprice* (1 - l_discount)) as revenue -from - lineitem, - part -where - ( - p_partkey = l_partkey - and p_brand = 'Brand#21' - and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') - and l_quantity >= 8 and l_quantity <= 8 + 10 - and p_size between 1 and 5 - and l_shipmode in ('AIR', 'AIR REG') - and l_shipinstruct = 'DELIVER IN PERSON' - ) - or - ( - p_partkey = l_partkey - and p_brand = 'Brand#13' - and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') - and l_quantity >= 20 and l_quantity <= 20 + 10 - and p_size between 1 and 10 - and l_shipmode in ('AIR', 'AIR REG') - and l_shipinstruct = 'DELIVER IN PERSON' - ) - or - ( - p_partkey = l_partkey - and p_brand = 'Brand#52' - and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') - and l_quantity >= 30 and l_quantity <= 30 + 10 - and p_size between 1 and 15 - and l_shipmode in ('AIR', 'AIR REG') - and l_shipinstruct = 'DELIVER IN PERSON' - ); diff --git a/benchmarks/tpch/queries/q2.sql b/benchmarks/tpch/queries/q2.sql deleted file mode 100644 index 46ec5d239..000000000 --- a/benchmarks/tpch/queries/q2.sql +++ /dev/null @@ -1,45 +0,0 @@ --- Benchmark Query 2 derived from TPC-H query 2 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - s_acctbal, - s_name, - n_name, - p_partkey, - p_mfgr, - s_address, - s_phone, - s_comment -from - part, - supplier, - partsupp, - nation, - region -where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and p_size = 48 - and p_type like '%TIN' - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'ASIA' - and ps_supplycost = ( - select - min(ps_supplycost) - from - partsupp, - supplier, - nation, - region - where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'ASIA' - ) -order by - s_acctbal desc, - n_name, - s_name, - p_partkey limit 100; diff --git a/benchmarks/tpch/queries/q20.sql b/benchmarks/tpch/queries/q20.sql deleted file mode 100644 index 5bb16563b..000000000 --- a/benchmarks/tpch/queries/q20.sql +++ /dev/null @@ -1,39 +0,0 @@ --- Benchmark Query 20 derived from TPC-H query 20 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - s_name, - s_address -from - supplier, - nation -where - s_suppkey in ( - select - ps_suppkey - from - partsupp - where - ps_partkey in ( - select - p_partkey - from - part - where - p_name like 'blanched%' - ) - and ps_availqty > ( - select - 0.5 * sum(l_quantity) - from - lineitem - where - l_partkey = ps_partkey - and l_suppkey = ps_suppkey - and l_shipdate >= date '1993-01-01' - and l_shipdate < date '1993-01-01' + interval '1' year - ) - ) - and s_nationkey = n_nationkey - and n_name = 'KENYA' -order by - s_name; diff --git a/benchmarks/tpch/queries/q21.sql b/benchmarks/tpch/queries/q21.sql deleted file mode 100644 index 6f84b876e..000000000 --- a/benchmarks/tpch/queries/q21.sql +++ /dev/null @@ -1,41 +0,0 @@ --- Benchmark Query 21 derived from TPC-H query 21 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - s_name, - count(*) as numwait -from - supplier, - lineitem l1, - orders, - nation -where - s_suppkey = l1.l_suppkey - and o_orderkey = l1.l_orderkey - and o_orderstatus = 'F' - and l1.l_receiptdate > l1.l_commitdate - and exists ( - select - * - from - lineitem l2 - where - l2.l_orderkey = l1.l_orderkey - and l2.l_suppkey <> l1.l_suppkey - ) - and not exists ( - select - * - from - lineitem l3 - where - l3.l_orderkey = l1.l_orderkey - and l3.l_suppkey <> l1.l_suppkey - and l3.l_receiptdate > l3.l_commitdate - ) - and s_nationkey = n_nationkey - and n_name = 'ARGENTINA' -group by - s_name -order by - numwait desc, - s_name limit 100; diff --git a/benchmarks/tpch/queries/q22.sql b/benchmarks/tpch/queries/q22.sql deleted file mode 100644 index 65ea49b04..000000000 --- a/benchmarks/tpch/queries/q22.sql +++ /dev/null @@ -1,39 +0,0 @@ --- Benchmark Query 22 derived from TPC-H query 22 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - cntrycode, - count(*) as numcust, - sum(c_acctbal) as totacctbal -from - ( - select - substring(c_phone from 1 for 2) as cntrycode, - c_acctbal - from - customer - where - substring(c_phone from 1 for 2) in - ('24', '34', '16', '30', '33', '14', '13') - and c_acctbal > ( - select - avg(c_acctbal) - from - customer - where - c_acctbal > 0.00 - and substring(c_phone from 1 for 2) in - ('24', '34', '16', '30', '33', '14', '13') - ) - and not exists ( - select - * - from - orders - where - o_custkey = c_custkey - ) - ) as custsale -group by - cntrycode -order by - cntrycode; diff --git a/benchmarks/tpch/queries/q3.sql b/benchmarks/tpch/queries/q3.sql deleted file mode 100644 index 161f2e1e4..000000000 --- a/benchmarks/tpch/queries/q3.sql +++ /dev/null @@ -1,24 +0,0 @@ --- Benchmark Query 3 derived from TPC-H query 3 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - l_orderkey, - sum(l_extendedprice * (1 - l_discount)) as revenue, - o_orderdate, - o_shippriority -from - customer, - orders, - lineitem -where - c_mktsegment = 'BUILDING' - and c_custkey = o_custkey - and l_orderkey = o_orderkey - and o_orderdate < date '1995-03-15' - and l_shipdate > date '1995-03-15' -group by - l_orderkey, - o_orderdate, - o_shippriority -order by - revenue desc, - o_orderdate limit 10; diff --git a/benchmarks/tpch/queries/q4.sql b/benchmarks/tpch/queries/q4.sql deleted file mode 100644 index e444dbfce..000000000 --- a/benchmarks/tpch/queries/q4.sql +++ /dev/null @@ -1,23 +0,0 @@ --- Benchmark Query 4 derived from TPC-H query 4 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - o_orderpriority, - count(*) as order_count -from - orders -where - o_orderdate >= date '1995-04-01' - and o_orderdate < date '1995-04-01' + interval '3' month - and exists ( - select - * - from - lineitem - where - l_orderkey = o_orderkey - and l_commitdate < l_receiptdate - ) -group by - o_orderpriority -order by - o_orderpriority; diff --git a/benchmarks/tpch/queries/q5.sql b/benchmarks/tpch/queries/q5.sql deleted file mode 100644 index 4426bd245..000000000 --- a/benchmarks/tpch/queries/q5.sql +++ /dev/null @@ -1,26 +0,0 @@ --- Benchmark Query 5 derived from TPC-H query 5 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - n_name, - sum(l_extendedprice * (1 - l_discount)) as revenue -from - customer, - orders, - lineitem, - supplier, - nation, - region -where - c_custkey = o_custkey - and l_orderkey = o_orderkey - and l_suppkey = s_suppkey - and c_nationkey = s_nationkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'AFRICA' - and o_orderdate >= date '1994-01-01' - and o_orderdate < date '1994-01-01' + interval '1' year -group by - n_name -order by - revenue desc; diff --git a/benchmarks/tpch/queries/q6.sql b/benchmarks/tpch/queries/q6.sql deleted file mode 100644 index 3d6e51cfe..000000000 --- a/benchmarks/tpch/queries/q6.sql +++ /dev/null @@ -1,11 +0,0 @@ --- Benchmark Query 6 derived from TPC-H query 6 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - sum(l_extendedprice * l_discount) as revenue -from - lineitem -where - l_shipdate >= date '1994-01-01' - and l_shipdate < date '1994-01-01' + interval '1' year - and l_discount between 0.04 - 0.01 and 0.04 + 0.01 - and l_quantity < 24; diff --git a/benchmarks/tpch/queries/q7.sql b/benchmarks/tpch/queries/q7.sql deleted file mode 100644 index 6e36ad616..000000000 --- a/benchmarks/tpch/queries/q7.sql +++ /dev/null @@ -1,41 +0,0 @@ --- Benchmark Query 7 derived from TPC-H query 7 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - supp_nation, - cust_nation, - l_year, - sum(volume) as revenue -from - ( - select - n1.n_name as supp_nation, - n2.n_name as cust_nation, - extract(year from l_shipdate) as l_year, - l_extendedprice * (1 - l_discount) as volume - from - supplier, - lineitem, - orders, - customer, - nation n1, - nation n2 - where - s_suppkey = l_suppkey - and o_orderkey = l_orderkey - and c_custkey = o_custkey - and s_nationkey = n1.n_nationkey - and c_nationkey = n2.n_nationkey - and ( - (n1.n_name = 'GERMANY' and n2.n_name = 'IRAQ') - or (n1.n_name = 'IRAQ' and n2.n_name = 'GERMANY') - ) - and l_shipdate between date '1995-01-01' and date '1996-12-31' - ) as shipping -group by - supp_nation, - cust_nation, - l_year -order by - supp_nation, - cust_nation, - l_year; diff --git a/benchmarks/tpch/queries/q8.sql b/benchmarks/tpch/queries/q8.sql deleted file mode 100644 index e28235ed4..000000000 --- a/benchmarks/tpch/queries/q8.sql +++ /dev/null @@ -1,39 +0,0 @@ --- Benchmark Query 8 derived from TPC-H query 8 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - o_year, - sum(case - when nation = 'IRAQ' then volume - else 0 - end) / sum(volume) as mkt_share -from - ( - select - extract(year from o_orderdate) as o_year, - l_extendedprice * (1 - l_discount) as volume, - n2.n_name as nation - from - part, - supplier, - lineitem, - orders, - customer, - nation n1, - nation n2, - region - where - p_partkey = l_partkey - and s_suppkey = l_suppkey - and l_orderkey = o_orderkey - and o_custkey = c_custkey - and c_nationkey = n1.n_nationkey - and n1.n_regionkey = r_regionkey - and r_name = 'MIDDLE EAST' - and s_nationkey = n2.n_nationkey - and o_orderdate between date '1995-01-01' and date '1996-12-31' - and p_type = 'LARGE PLATED STEEL' - ) as all_nations -group by - o_year -order by - o_year; diff --git a/benchmarks/tpch/queries/q9.sql b/benchmarks/tpch/queries/q9.sql deleted file mode 100644 index 86ae02482..000000000 --- a/benchmarks/tpch/queries/q9.sql +++ /dev/null @@ -1,34 +0,0 @@ --- Benchmark Query 9 derived from TPC-H query 9 under the terms of the TPC Fair Use Policy. --- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council. -select - nation, - o_year, - sum(amount) as sum_profit -from - ( - select - n_name as nation, - extract(year from o_orderdate) as o_year, - l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount - from - part, - supplier, - lineitem, - partsupp, - orders, - nation - where - s_suppkey = l_suppkey - and ps_suppkey = l_suppkey - and ps_partkey = l_partkey - and p_partkey = l_partkey - and o_orderkey = l_orderkey - and s_nationkey = n_nationkey - and p_name like '%moccasin%' - ) as profit -group by - nation, - o_year -order by - nation, - o_year desc; diff --git a/benchmarks/tpch/tpch-gen.sh b/benchmarks/tpch/tpch-gen.sh deleted file mode 100755 index 139c300a2..000000000 --- a/benchmarks/tpch/tpch-gen.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -mkdir -p data/answers 2>/dev/null - -set -e - -# If RUN_IN_CI is set, then do not produce verbose output or use an interactive terminal -if [[ -z "${RUN_IN_CI}" ]]; then - TERMINAL_FLAG="-it" - VERBOSE_OUTPUT="-vf" -else - TERMINAL_FLAG="" - VERBOSE_OUTPUT="-f" -fi - -#pushd .. -#. ./dev/build-set-env.sh -#popd - -# Generate data into the ./data directory if it does not already exist -FILE=./data/supplier.tbl -if test -f "$FILE"; then - echo "$FILE exists." -else - docker run -v `pwd`/data:/data $TERMINAL_FLAG --rm ghcr.io/scalytics/tpch-docker:main $VERBOSE_OUTPUT -s $1 - - # workaround for https://github.com/apache/arrow-datafusion/issues/6147 - mv data/customer.tbl data/customer.csv - mv data/lineitem.tbl data/lineitem.csv - mv data/nation.tbl data/nation.csv - mv data/orders.tbl data/orders.csv - mv data/part.tbl data/part.csv - mv data/partsupp.tbl data/partsupp.csv - mv data/region.tbl data/region.csv - mv data/supplier.tbl data/supplier.csv - - ls -l data -fi - -# Copy expected answers (at SF=1) into the ./data/answers directory if it does not already exist -FILE=./data/answers/q1.out -if test -f "$FILE"; then - echo "$FILE exists." -else - docker run -v `pwd`/data:/data $TERMINAL_FLAG --entrypoint /bin/bash --rm ghcr.io/scalytics/tpch-docker:main -c "cp /opt/tpch/2.18.0_rc2/dbgen/answers/* /data/answers/" -fi diff --git a/benchmarks/tpch/tpch.py b/benchmarks/tpch/tpch.py deleted file mode 100644 index ffee5554c..000000000 --- a/benchmarks/tpch/tpch.py +++ /dev/null @@ -1,99 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import argparse -import time -from pathlib import Path - -from datafusion import SessionContext - - -def bench(data_path, query_path) -> None: - with Path("results.csv").open("w") as results: - # register tables - start = time.time() - total_time_millis = 0 - - # create context - # runtime = ( - # RuntimeEnvBuilder() - # .with_disk_manager_os() - # .with_fair_spill_pool(10000000) - # ) - # config = ( - # SessionConfig() - # .with_create_default_catalog_and_schema(True) - # .with_default_catalog_and_schema("datafusion", "tpch") - # .with_information_schema(True) - # ) - # ctx = SessionContext(config, runtime) - - ctx = SessionContext() - print("Configuration:\n", ctx) - - # register tables - with Path("create_tables.sql").open() as f: - sql = "" - for line in f.readlines(): - if line.startswith("--"): - continue - sql = sql + line - if sql.strip().endswith(";"): - sql = sql.strip().replace("$PATH", data_path) - ctx.sql(sql) - sql = "" - - end = time.time() - time_millis = (end - start) * 1000 - total_time_millis += time_millis - print(f"setup,{round(time_millis, 1)}") - results.write(f"setup,{round(time_millis, 1)}\n") - results.flush() - - # run queries - for query in range(1, 23): - with Path(f"{query_path}/q{query}.sql").open() as f: - text = f.read() - tmp = text.split(";") - queries = [s.strip() for s in tmp if len(s.strip()) > 0] - - try: - start = time.time() - for sql in queries: - print(sql) - df = ctx.sql(sql) - # result_set = df.collect() - df.show() - end = time.time() - time_millis = (end - start) * 1000 - total_time_millis += time_millis - print(f"q{query},{round(time_millis, 1)}") - results.write(f"q{query},{round(time_millis, 1)}\n") - results.flush() - except Exception as e: - print("query", query, "failed", e) - - print(f"total,{round(total_time_millis, 1)}") - results.write(f"total,{round(total_time_millis, 1)}\n") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("data_path") - parser.add_argument("query_path") - args = parser.parse_args() - bench(args.data_path, args.query_path) diff --git a/build.rs b/build.rs deleted file mode 100644 index 4878d8b0e..000000000 --- a/build.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -fn main() { - pyo3_build_config::add_extension_module_link_args(); -} diff --git a/ci/scripts/python_lint.sh b/ci/scripts/python_lint.sh deleted file mode 100755 index 3f7310ba7..000000000 --- a/ci/scripts/python_lint.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex -ruff format datafusion -ruff check datafusion \ No newline at end of file diff --git a/ci/scripts/rust_clippy.sh b/ci/scripts/rust_clippy.sh deleted file mode 100755 index 911330c8b..000000000 --- a/ci/scripts/rust_clippy.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex -cargo clippy --all-targets --workspace --features default -- -D warnings diff --git a/ci/scripts/rust_fmt.sh b/ci/scripts/rust_fmt.sh deleted file mode 100755 index 05cb6b208..000000000 --- a/ci/scripts/rust_fmt.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex -cargo +nightly fmt --all -- --check diff --git a/ci/scripts/rust_toml_fmt.sh b/ci/scripts/rust_toml_fmt.sh deleted file mode 100755 index e297ef001..000000000 --- a/ci/scripts/rust_toml_fmt.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex -find . -mindepth 2 -name 'Cargo.toml' -exec cargo tomlfmt -p {} \; diff --git a/conftest.py b/conftest.py deleted file mode 100644 index 1c89f92bc..000000000 --- a/conftest.py +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Pytest configuration for doctest namespace injection.""" - -import datafusion as dfn -import numpy as np -import pytest - - -@pytest.fixture(autouse=True) -def _doctest_namespace(doctest_namespace: dict) -> None: - """Add common imports to the doctest namespace.""" - doctest_namespace["dfn"] = dfn - doctest_namespace["np"] = np diff --git a/contributor-guide/ffi.html b/contributor-guide/ffi.html new file mode 100644 index 000000000..e48533bb9 --- /dev/null +++ b/contributor-guide/ffi.html @@ -0,0 +1,747 @@ + + + + + + + + Python Extensions — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

Python Extensions

+

The DataFusion in Python project is designed to allow users to extend its functionality in a few core +areas. Ideally many users would like to package their extensions as a Python package and easily +integrate that package with this project. This page serves to describe some of the challenges we face +when doing these integrations and the approach our project uses.

+
+

The Primary Issue

+

Suppose you wish to use DataFusion and you have a custom data source that can produce tables that +can then be queried against, similar to how you can register a CSV or +Parquet file. In DataFusion terminology, you likely want to implement a +Custom Table Provider. In an effort to make your data source +as performant as possible and to utilize the features of DataFusion, you may decide to write +your source in Rust and then expose it through PyO3 as a Python library.

+

At first glance, it may appear the best way to do this is to add the datafusion-python +crate as a dependency, provide a PyTable, and then to register it with the +SessionContext. Unfortunately, this will not work.

+

When you produce your code as a Python library and it needs to interact with the DataFusion +library, at the lowest level they communicate through an Application Binary Interface (ABI). +The acronym sounds similar to API (Application Programming Interface), but it is distinctly +different.

+

The ABI sets the standard for how these libraries can share data and functions between each +other. One of the key differences between Rust and other programming languages is that Rust +does not have a stable ABI. What this means in practice is that if you compile a Rust library +with one version of the rustc compiler and I compile another library to interface with it +but I use a different version of the compiler, there is no guarantee the interface will be +the same.

+

In practice, this means that a Python library built with datafusion-python as a Rust +dependency will generally not be compatible with the DataFusion Python package, even +if they reference the same version of datafusion-python. If you attempt to do this, it may +work on your local computer if you have built both packages with the same optimizations. +This can sometimes lead to a false expectation that the code will work, but it frequently +breaks the moment you try to use your package against the released packages.

+

You can find more information about the Rust ABI in their +online documentation.

+
+
+

The FFI Approach

+

Rust supports interacting with other programming languages through it’s Foreign Function +Interface (FFI). The advantage of using the FFI is that it enables you to write data structures +and functions that have a stable ABI. The allows you to use Rust code with C, Python, and +other languages. In fact, the PyO3 library uses the FFI to share data +and functions between Python and Rust.

+

The approach we are taking in the DataFusion in Python project is to incrementally expose +more portions of the DataFusion project via FFI interfaces. This allows users to write Rust +code that does not require the datafusion-python crate as a dependency, expose their +code in Python via PyO3, and have it interact with the DataFusion Python package.

+

Early adopters of this approach include delta-rs +who has adapted their Table Provider for use in `datafusion-python` with only a few lines +of code. Also, the DataFusion Python project uses the existing definitions from +Apache Arrow CStream Interface +to support importing and exporting tables. Any Python package that supports reading +the Arrow C Stream interface can work with DataFusion Python out of the box! You can read +more about working with Arrow sources in the Data Sources +page.

+

To learn more about the Foreign Function Interface in Rust, the +Rustonomicon is a good resource.

+
+
+

Inspiration from Arrow

+

DataFusion is built upon Apache Arrow. The canonical Python +Arrow implementation, pyarrow provides +an excellent way to share Arrow data between Python projects without performing any copy +operations on the data. They do this by using a well defined set of interfaces. You can +find the details about their stream interface +here. The +Rust Arrow Implementation also supports these +C style definitions via the Foreign Function Interface.

+

In addition to using these interfaces to transfer Arrow data between libraries, pyarrow +goes one step further to make sharing the interfaces easier in Python. They do this +by exposing PyCapsules that contain the expected functionality.

+

You can learn more about PyCapsules from the official +Python online documentation. PyCapsules +have excellent support in PyO3 already. The +PyO3 online documentation is a good source +for more details on using PyCapsules in Rust.

+

Two lessons we leverage from the Arrow project in DataFusion Python are:

+
    +
  • We reuse the existing Arrow FFI functionality wherever possible.

  • +
  • We expose PyCapsules that contain a FFI stable struct.

  • +
+
+
+

Implementation Details

+

The bulk of the code necessary to perform our FFI operations is in the upstream +DataFusion core repository. You can review the code and +documentation in the datafusion-ffi crate.

+

Our FFI implementation is narrowly focused at sharing data and functions with Rust backed +libraries. This allows us to use the abi_stable crate. +This is an excellent crate that allows for easy conversion between Rust native types +and FFI-safe alternatives. For example, if you needed to pass a Vec<String> via FFI, +you can simply convert it to a RVec<RString> in an intuitive manner. It also supports +features like RResult and ROption that do not have an obvious translation to a +C equivalent.

+

The datafusion-ffi crate has been designed to make it easy to convert from DataFusion +traits into their FFI counterparts. For example, if you have defined a custom +TableProvider +and you want to create a sharable FFI counterpart, you could write:

+
let my_provider = MyTableProvider::default();
+let ffi_provider = FFI_TableProvider::new(Arc::new(my_provider), false, None);
+
+
+
+
+

PyO3 class mutability guidelines

+

PyO3 bindings should present immutable wrappers whenever a struct stores shared or +interior-mutable state. In practice this means that any #[pyclass] containing an +Arc<RwLock<_>> or similar synchronized primitive must opt into #[pyclass(frozen)] +unless there is a compelling reason not to.

+

The datafusion configuration helpers illustrate the preferred pattern. The +PyConfig class in src/config.rs stores an Arc<RwLock<ConfigOptions>> and is +explicitly frozen so callers interact with configuration state through provided methods +instead of mutating the container directly:

+
#[pyclass(name = "Config", module = "datafusion", subclass, frozen)]
+#[derive(Clone)]
+pub(crate) struct PyConfig {
+    config: Arc<RwLock<ConfigOptions>>,
+}
+
+
+

The same approach applies to execution contexts. PySessionContext in +src/context.rs stays frozen even though it shares mutable state internally via +SessionContext. This ensures PyO3 tracks borrows correctly while Python-facing APIs +clone the inner SessionContext or return new wrappers instead of mutating the +existing instance in place:

+
#[pyclass(frozen, name = "SessionContext", module = "datafusion", subclass)]
+#[derive(Clone)]
+pub struct PySessionContext {
+    pub ctx: SessionContext,
+}
+
+
+

Occasionally a type must remain mutable—for example when PyO3 attribute setters need to +update fields directly. In these rare cases add an inline justification so reviewers and +future contributors understand why frozen is unsafe to enable. DataTypeMap in +src/common/data_type.rs includes such a comment because PyO3 still needs to track +field updates:

+
// TODO: This looks like this needs pyo3 tracking so leaving unfrozen for now
+#[derive(Debug, Clone)]
+#[pyclass(name = "DataTypeMap", module = "datafusion.common", subclass)]
+pub struct DataTypeMap {
+    #[pyo3(get, set)]
+    pub arrow_type: PyDataType,
+    #[pyo3(get, set)]
+    pub python_type: PythonType,
+    #[pyo3(get, set)]
+    pub sql_type: SqlType,
+}
+
+
+

When reviewers encounter a mutable #[pyclass] without a comment, they should request +an explanation or ask that frozen be added. Keeping these wrappers frozen by default +helps avoid subtle bugs stemming from PyO3’s interior mutability tracking.

+

If you were interfacing with a library that provided the above FFI_TableProvider and +you needed to turn it back into an TableProvider, you can turn it into a +ForeignTableProvider with implements the TableProvider trait.

+
let foreign_provider: ForeignTableProvider = ffi_provider.into();
+
+
+

If you review the code in datafusion-ffi you will find that each of the traits we share +across the boundary has two portions, one with a FFI_ prefix and one with a Foreign +prefix. This is used to distinguish which side of the FFI boundary that struct is +designed to be used on. The structures with the FFI_ prefix are to be used on the +provider of the structure. In the example we’re showing, this means the code that has +written the underlying TableProvider implementation to access your custom data source. +The structures with the Foreign prefix are to be used by the receiver. In this case, +it is the datafusion-python library.

+

In order to share these FFI structures, we need to wrap them in some kind of Python object +that can be used to interface from one package to another. As described in the above +section on our inspiration from Arrow, we use PyCapsule. We can create a PyCapsule +for our provider thusly:

+
let name = CString::new("datafusion_table_provider")?;
+let my_capsule = PyCapsule::new_bound(py, provider, Some(name))?;
+
+
+

On the receiving side, turn this pycapsule object into the FFI_TableProvider, which +can then be turned into a ForeignTableProvider the associated code is:

+
let capsule = capsule.downcast::<PyCapsule>()?;
+let provider = unsafe { capsule.reference::<FFI_TableProvider>() };
+
+
+

By convention the datafusion-python library expects a Python object that has a +TableProvider PyCapsule to have this capsule accessible by calling a function named +__datafusion_table_provider__. You can see a complete working example of how to +share a TableProvider from one python library to DataFusion Python in the +repository examples folder.

+

This section has been written using TableProvider as an example. It is the first +extension that has been written using this approach and the most thoroughly implemented. +As we continue to expose more of the DataFusion features, we intend to follow this same +design pattern.

+
+
+

Alternative Approach

+

Suppose you needed to expose some other features of DataFusion and you could not wait +for the upstream repository to implement the FFI approach we describe. In this case +you decide to create your dependency on the datafusion-python crate instead.

+

As we discussed, this is not guaranteed to work across different compiler versions and +optimization levels. If you wish to go down this route, there are two approaches we +have identified you can use.

+
    +
  1. Re-export all of datafusion-python yourself with your extensions built in.

  2. +
  3. Carefully synchronize your software releases with the datafusion-python CI build +system so that your libraries use the exact same compiler, features, and +optimization level.

  4. +
+

We currently do not recommend either of these approaches as they are difficult to +maintain over a long period. Additionally, they require a tight version coupling +between libraries.

+
+
+

Status of Work

+

At the time of this writing, the FFI features are under active development. To see +the latest status, we recommend reviewing the code in the datafusion-ffi crate.

+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/contributor-guide/introduction.html b/contributor-guide/introduction.html new file mode 100644 index 000000000..d9cc00b36 --- /dev/null +++ b/contributor-guide/introduction.html @@ -0,0 +1,633 @@ + + + + + + + + Introduction — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

Introduction

+

We welcome and encourage contributions of all kinds, such as:

+
    +
  1. Tickets with issue reports of feature requests

  2. +
  3. Documentation improvements

  4. +
  5. Code, both PR and (especially) PR Review.

  6. +
+

In addition to submitting new PRs, we have a healthy tradition of community members reviewing each other’s PRs. +Doing so is a great way to help the community as well as get more familiar with Rust and the relevant codebases.

+

Before opening a pull request that touches PyO3 bindings, please review the +PyO3 class mutability guidelines so you can flag missing +#[pyclass(frozen)] annotations during development and review.

+
+

How to develop

+

This assumes that you have rust and cargo installed. We use the workflow recommended by +pyo3 and maturin. We recommend using +uv for python package management.

+

By default uv will attempt to build the datafusion python package. For our development we prefer to build manually. This means +that when creating your virtual environment using uv sync you need to pass in the additional –no-install-package datafusion +and for uv run commands the additional parameter –no-project

+

Bootstrap:

+
# fetch this repo
+git clone git@github.com:apache/datafusion-python.git
+# create the virtual environment
+uv sync --dev --no-install-package datafusion
+# activate the environment
+source .venv/bin/activate
+
+
+

The tests rely on test data in git submodules.

+
git submodule init
+git submodule update
+
+
+

Whenever rust code changes (your changes or via git pull):

+
# make sure you activate the venv using "source .venv/bin/activate" first
+maturin develop -uv
+python -m pytest
+
+
+
+
+

Running & Installing pre-commit hooks

+

arrow-datafusion-python takes advantage of pre-commit to assist developers with code linting to help reduce the number of commits that ultimately fail in CI due to linter errors. Using the pre-commit hooks is optional for the developer but certainly helpful for keeping PRs clean and concise.

+

Our pre-commit hooks can be installed by running pre-commit install, which will install the configurations in your ARROW_DATAFUSION_PYTHON_ROOT/.github directory and run each time you perform a commit, failing to complete the commit if an offending lint is found allowing you to make changes locally before pushing.

+

The pre-commit hooks can also be run adhoc without installing them by simply running pre-commit run --all-files

+
+
+

Guidelines for Separating Python and Rust Code

+

Version 40 of datafusion-python introduced python wrappers around the pyo3 generated code to vastly improve the user experience. (See the blog post and pull request for more details.)

+

Mostly, the python code is limited to pure wrappers with type hints and good docstrings, but there are a few reasons for when the code does more:

+
    +
  1. Trivial aliases like array_append() and list_append().

  2. +
  3. Simple type conversion, like from a path to a string of the path or from number to lit(number).

  4. +
  5. The additional code makes an API much more pythonic, like we do for named_struct() (see source code).

  6. +
+
+
+

Update Dependencies

+

To change test dependencies, change the pyproject.toml and run

+

To update dependencies, run

+
uv sync --dev --no-install-package datafusion
+
+
+
+
+

Improving Build Speed

+

The pyo3 dependency of this project contains a build.rs file which +can cause it to rebuild frequently. You can prevent this from happening by defining a PYO3_CONFIG_FILE +environment variable that points to a file with your build configuration. Whenever your build configuration +changes, such as during some major version updates, you will need to regenerate this file. This variable +should point to a fully resolved path on your build machine.

+

To generate this file, use the following command:

+
PYO3_PRINT_CONFIG=1 cargo build
+
+
+

This will generate some output that looks like the following. You will want to copy these contents intro +a file. If you place this file in your project directory with filename .pyo3_build_config it will +be ignored by git.

+
implementation=CPython
+version=3.9
+shared=true
+abi3=true
+lib_name=python3.12
+lib_dir=/opt/homebrew/opt/python@3.12/Frameworks/Python.framework/Versions/3.12/lib
+executable=/Users/myusername/src/datafusion-python/.venv/bin/python
+pointer_width=64
+build_flags=
+suppress_build_script_link_lines=false
+
+
+

Add the environment variable to your system.

+
export PYO3_CONFIG_FILE="/Users//myusername/src/datafusion-python/.pyo3_build_config"
+
+
+

If you are on a Mac and you use VS Code for your IDE, you will want to add these variables +to your settings. You can find the appropriate rust flags by looking in the +.cargo/config.toml file.

+
"rust-analyzer.cargo.extraEnv": {
+    "RUSTFLAGS": "-C link-arg=-undefined -C link-arg=dynamic_lookup",
+    "PYO3_CONFIG_FILE": "/Users/myusername/src/datafusion-python/.pyo3_build_config"
+},
+"rust-analyzer.runnables.extraEnv": {
+    "RUSTFLAGS": "-C link-arg=-undefined -C link-arg=dynamic_lookup",
+    "PYO3_CONFIG_FILE": "/Users/myusername/src/personal/datafusion-python/.pyo3_build_config"
+}
+
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/dev/build-set-env.sh b/dev/build-set-env.sh deleted file mode 100755 index 1d984710c..000000000 --- a/dev/build-set-env.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -export PY_DATAFUSION_VERSION=$(awk -F'[ ="]+' '$1 == "version" { print $2 }' Cargo.toml) diff --git a/dev/changelog/43.0.0.md b/dev/changelog/43.0.0.md deleted file mode 100644 index bbb766910..000000000 --- a/dev/changelog/43.0.0.md +++ /dev/null @@ -1,73 +0,0 @@ - - -# Apache DataFusion Python 43.0.0 Changelog - -This release consists of 26 commits from 7 contributors. See credits at the end of this changelog for more information. - -**Implemented enhancements:** - -- feat: expose `drop` method [#913](https://github.com/apache/datafusion-python/pull/913) (ion-elgreco) -- feat: expose `join_on` [#914](https://github.com/apache/datafusion-python/pull/914) (ion-elgreco) -- feat: add fill_null/nan expressions [#919](https://github.com/apache/datafusion-python/pull/919) (ion-elgreco) -- feat: add `with_columns` [#909](https://github.com/apache/datafusion-python/pull/909) (ion-elgreco) -- feat: add `cast` to DataFrame [#916](https://github.com/apache/datafusion-python/pull/916) (ion-elgreco) -- feat: add `head`, `tail` methods [#915](https://github.com/apache/datafusion-python/pull/915) (ion-elgreco) - -**Fixed bugs:** - -- fix: remove use of deprecated `make_scalar_function` [#906](https://github.com/apache/datafusion-python/pull/906) (Michael-J-Ward) -- fix: udwf example [#948](https://github.com/apache/datafusion-python/pull/948) (mesejo) - -**Other:** - -- Ts/minor updates release process [#903](https://github.com/apache/datafusion-python/pull/903) (timsaucer) -- build(deps): bump pyo3 from 0.22.3 to 0.22.4 [#910](https://github.com/apache/datafusion-python/pull/910) (dependabot[bot]) -- refactor: `from_arrow` use protocol typehints [#917](https://github.com/apache/datafusion-python/pull/917) (ion-elgreco) -- Change requires-python version in pyproject.toml [#924](https://github.com/apache/datafusion-python/pull/924) (kosiew) -- chore: deprecate `select_columns` [#911](https://github.com/apache/datafusion-python/pull/911) (ion-elgreco) -- build(deps): bump uuid from 1.10.0 to 1.11.0 [#927](https://github.com/apache/datafusion-python/pull/927) (dependabot[bot]) -- Add array_empty scalar function [#931](https://github.com/apache/datafusion-python/pull/931) (kosiew) -- add `cardinality` function to calculate total distinct elements in an array [#937](https://github.com/apache/datafusion-python/pull/937) (kosiew) -- Add empty scalar function (alias of array_empty), fix a small typo [#938](https://github.com/apache/datafusion-python/pull/938) (kosiew) -- README How to develop section now also works on Apple M1 [#940](https://github.com/apache/datafusion-python/pull/940) (drauschenbach) -- refactor: dataframe `join` params [#912](https://github.com/apache/datafusion-python/pull/912) (ion-elgreco) -- Upgrade to Datafusion 43 [#905](https://github.com/apache/datafusion-python/pull/905) (Michael-J-Ward) -- build(deps): bump tokio from 1.40.0 to 1.41.1 [#946](https://github.com/apache/datafusion-python/pull/946) (dependabot[bot]) -- Add list_cat, list_concat, list_repeat [#942](https://github.com/apache/datafusion-python/pull/942) (kosiew) -- Add foreign table providers [#921](https://github.com/apache/datafusion-python/pull/921) (timsaucer) -- Add make_list and tests for make_list, make_array [#949](https://github.com/apache/datafusion-python/pull/949) (kosiew) -- Documentation updates: simplify examples and add section on data sources [#955](https://github.com/apache/datafusion-python/pull/955) (timsaucer) -- Add datafusion.extract [#959](https://github.com/apache/datafusion-python/pull/959) (kosiew) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 9 Ion Koutsouris - 7 kosiew - 3 Tim Saucer - 3 dependabot[bot] - 2 Michael J Ward - 1 Daniel Mesejo - 1 David Rauschenbach -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/dev/changelog/44.0.0.md b/dev/changelog/44.0.0.md deleted file mode 100644 index c5ed4bdb0..000000000 --- a/dev/changelog/44.0.0.md +++ /dev/null @@ -1,58 +0,0 @@ - - -# Apache DataFusion Python 44.0.0 Changelog - -This release consists of 12 commits from 5 contributors. See credits at the end of this changelog for more information. - -**Implemented enhancements:** - -- feat: support enable_url_table config [#980](https://github.com/apache/datafusion-python/pull/980) (chenkovsky) -- feat: remove DataFusion pyarrow feat [#1000](https://github.com/apache/datafusion-python/pull/1000) (timsaucer) - -**Fixed bugs:** - -- fix: correct LZ0 to LZO in compression options [#995](https://github.com/apache/datafusion-python/pull/995) (kosiew) - -**Other:** - -- Add arrow cast [#962](https://github.com/apache/datafusion-python/pull/962) (kosiew) -- Fix small issues in pyproject.toml [#976](https://github.com/apache/datafusion-python/pull/976) (kylebarron) -- chore: set validation and type hint for ffi tableprovider [#983](https://github.com/apache/datafusion-python/pull/983) (ion-elgreco) -- Support async iteration of RecordBatchStream [#975](https://github.com/apache/datafusion-python/pull/975) (kylebarron) -- Chore/upgrade datafusion 44 [#973](https://github.com/apache/datafusion-python/pull/973) (timsaucer) -- Default to ZSTD compression when writing Parquet [#981](https://github.com/apache/datafusion-python/pull/981) (kosiew) -- Feat/use uv python management [#994](https://github.com/apache/datafusion-python/pull/994) (timsaucer) -- minor: Update dependencies prior to release [#999](https://github.com/apache/datafusion-python/pull/999) (timsaucer) -- Apply import ordering in ruff check [#1001](https://github.com/apache/datafusion-python/pull/1001) (timsaucer) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 5 Tim Saucer - 3 kosiew - 2 Kyle Barron - 1 Chongchen Chen - 1 Ion Koutsouris -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - diff --git a/dev/changelog/45.0.0.md b/dev/changelog/45.0.0.md deleted file mode 100644 index 93659b171..000000000 --- a/dev/changelog/45.0.0.md +++ /dev/null @@ -1,42 +0,0 @@ - - -# Apache DataFusion Python 45.0.0 Changelog - -This release consists of 2 commits from 2 contributors. See credits at the end of this changelog for more information. - -**Fixed bugs:** - -- fix: add to_timestamp_nanos [#1020](https://github.com/apache/datafusion-python/pull/1020) (chenkovsky) - -**Other:** - -- Chore/upgrade datafusion 45 [#1010](https://github.com/apache/datafusion-python/pull/1010) (kevinjqliu) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 1 Kevin Liu - 1 Tim Saucer -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - diff --git a/dev/changelog/46.0.0.md b/dev/changelog/46.0.0.md deleted file mode 100644 index 3e5768099..000000000 --- a/dev/changelog/46.0.0.md +++ /dev/null @@ -1,73 +0,0 @@ - - -# Apache DataFusion Python 46.0.0 Changelog - -This release consists of 21 commits from 11 contributors. See credits at the end of this changelog for more information. - -**Implemented enhancements:** - -- feat: reads using global ctx [#982](https://github.com/apache/datafusion-python/pull/982) (ion-elgreco) -- feat: Implementation of udf and udaf decorator [#1040](https://github.com/apache/datafusion-python/pull/1040) (CrystalZhou0529) -- feat: expose regex_count function [#1066](https://github.com/apache/datafusion-python/pull/1066) (nirnayroy) -- feat: Update DataFusion dependency to 46 [#1079](https://github.com/apache/datafusion-python/pull/1079) (timsaucer) - -**Fixed bugs:** - -- fix: add to_timestamp_nanos [#1020](https://github.com/apache/datafusion-python/pull/1020) (chenkovsky) -- fix: type checking [#993](https://github.com/apache/datafusion-python/pull/993) (chenkovsky) - -**Other:** - -- [infra] Fail Clippy on rust build warnings [#1029](https://github.com/apache/datafusion-python/pull/1029) (kevinjqliu) -- Add user documentation for the FFI approach [#1031](https://github.com/apache/datafusion-python/pull/1031) (timsaucer) -- build(deps): bump arrow from 54.1.0 to 54.2.0 [#1035](https://github.com/apache/datafusion-python/pull/1035) (dependabot[bot]) -- Chore: Release datafusion-python 45 [#1024](https://github.com/apache/datafusion-python/pull/1024) (timsaucer) -- Enable Dataframe to be converted into views which can be used in register_table [#1016](https://github.com/apache/datafusion-python/pull/1016) (kosiew) -- Add ruff check for missing futures import [#1052](https://github.com/apache/datafusion-python/pull/1052) (timsaucer) -- Enable take comments to assign issues to users [#1058](https://github.com/apache/datafusion-python/pull/1058) (timsaucer) -- Update python min version to 3.9 [#1043](https://github.com/apache/datafusion-python/pull/1043) (kevinjqliu) -- feat/improve ruff test coverage [#1055](https://github.com/apache/datafusion-python/pull/1055) (timsaucer) -- feat/making global context accessible for users [#1060](https://github.com/apache/datafusion-python/pull/1060) (jsai28) -- Renaming Internal Structs [#1059](https://github.com/apache/datafusion-python/pull/1059) (Spaarsh) -- test: add pytest asyncio tests [#1063](https://github.com/apache/datafusion-python/pull/1063) (jsai28) -- Add decorator for udwf [#1061](https://github.com/apache/datafusion-python/pull/1061) (kosiew) -- Add additional ruff suggestions [#1062](https://github.com/apache/datafusion-python/pull/1062) (Spaarsh) -- Improve collection during repr and repr_html [#1036](https://github.com/apache/datafusion-python/pull/1036) (timsaucer) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 7 Tim Saucer - 2 Kevin Liu - 2 Spaarsh - 2 jsai28 - 2 kosiew - 1 Chen Chongchen - 1 Chongchen Chen - 1 Crystal Zhou - 1 Ion Koutsouris - 1 Nirnay Roy - 1 dependabot[bot] -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - diff --git a/dev/changelog/47.0.0.md b/dev/changelog/47.0.0.md deleted file mode 100644 index a7ed90313..000000000 --- a/dev/changelog/47.0.0.md +++ /dev/null @@ -1,64 +0,0 @@ - - -# Apache DataFusion Python 47.0.0 Changelog - -This release consists of 23 commits from 5 contributors. See credits at the end of this changelog for more information. - -**Implemented enhancements:** - -- feat: support unparser [#1088](https://github.com/apache/datafusion-python/pull/1088) (chenkovsky) -- feat: update datafusion dependency 47 [#1107](https://github.com/apache/datafusion-python/pull/1107) (timsaucer) -- feat: alias with metadata [#1111](https://github.com/apache/datafusion-python/pull/1111) (chenkovsky) -- feat: add missing PyLogicalPlan to_variant [#1085](https://github.com/apache/datafusion-python/pull/1085) (chenkovsky) -- feat: add user defined table function support [#1113](https://github.com/apache/datafusion-python/pull/1113) (timsaucer) - -**Fixed bugs:** - -- fix: recursive import [#1117](https://github.com/apache/datafusion-python/pull/1117) (chenkovsky) - -**Other:** - -- Update changelog and version number [#1089](https://github.com/apache/datafusion-python/pull/1089) (timsaucer) -- Documentation updates: mention correct dataset on basics page [#1081](https://github.com/apache/datafusion-python/pull/1081) (floscha) -- Add Configurable HTML Table Formatter for DataFusion DataFrames in Python [#1100](https://github.com/apache/datafusion-python/pull/1100) (kosiew) -- Add DataFrame usage guide with HTML rendering customization options [#1108](https://github.com/apache/datafusion-python/pull/1108) (kosiew) -- 1075/enhancement/Make col class with __getattr__ [#1076](https://github.com/apache/datafusion-python/pull/1076) (deanm0000) -- 1064/enhancement/add functions to Expr class [#1074](https://github.com/apache/datafusion-python/pull/1074) (deanm0000) -- ci: require approving review [#1122](https://github.com/apache/datafusion-python/pull/1122) (timsaucer) -- Partial fix for 1078: Enhance DataFrame Formatter Configuration with Memory and Display Controls [#1119](https://github.com/apache/datafusion-python/pull/1119) (kosiew) -- Add fill_null method to DataFrame API for handling missing values [#1019](https://github.com/apache/datafusion-python/pull/1019) (kosiew) -- minor: reduce error size [#1126](https://github.com/apache/datafusion-python/pull/1126) (timsaucer) -- Move the udf module to user_defined [#1112](https://github.com/apache/datafusion-python/pull/1112) (timsaucer) -- add unit tests for expression functions [#1121](https://github.com/apache/datafusion-python/pull/1121) (timsaucer) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 12 Tim Saucer - 4 Chen Chongchen - 4 kosiew - 2 deanm0000 - 1 Florian Schäfer -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - diff --git a/dev/changelog/48.0.0.md b/dev/changelog/48.0.0.md deleted file mode 100644 index 80bc61aca..000000000 --- a/dev/changelog/48.0.0.md +++ /dev/null @@ -1,59 +0,0 @@ - - -# Apache DataFusion Python 48.0.0 Changelog - -This release consists of 15 commits from 6 contributors. See credits at the end of this changelog for more information. - -**Implemented enhancements:** - -- feat: upgrade df48 dependency [#1143](https://github.com/apache/datafusion-python/pull/1143) (timsaucer) -- feat: Support Parquet writer options [#1123](https://github.com/apache/datafusion-python/pull/1123) (nuno-faria) -- feat: dataframe string formatter [#1170](https://github.com/apache/datafusion-python/pull/1170) (timsaucer) -- feat: collect once during display() in jupyter notebooks [#1167](https://github.com/apache/datafusion-python/pull/1167) (timsaucer) -- feat: python based catalog and schema provider [#1156](https://github.com/apache/datafusion-python/pull/1156) (timsaucer) -- feat: add FFI support for user defined functions [#1145](https://github.com/apache/datafusion-python/pull/1145) (timsaucer) - -**Other:** - -- Release DataFusion 47.0.0 [#1130](https://github.com/apache/datafusion-python/pull/1130) (timsaucer) -- Add a documentation build step in CI [#1139](https://github.com/apache/datafusion-python/pull/1139) (crystalxyz) -- Add DataFrame API Documentation for DataFusion Python [#1132](https://github.com/apache/datafusion-python/pull/1132) (kosiew) -- Add Interruptible Query Execution in Jupyter via KeyboardInterrupt Support [#1141](https://github.com/apache/datafusion-python/pull/1141) (kosiew) -- Support types other than String and Int for partition columns [#1154](https://github.com/apache/datafusion-python/pull/1154) (miclegr) -- Fix signature of `__arrow_c_stream__` [#1168](https://github.com/apache/datafusion-python/pull/1168) (kylebarron) -- Consolidate DataFrame Docs: Merge HTML Rendering Section as Subpage [#1161](https://github.com/apache/datafusion-python/pull/1161) (kosiew) -- Add compression_level support to ParquetWriterOptions and enhance write_parquet to accept full options object [#1169](https://github.com/apache/datafusion-python/pull/1169) (kosiew) -- Simplify HTML Formatter Style Handling Using Script Injection [#1177](https://github.com/apache/datafusion-python/pull/1177) (kosiew) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 6 Tim Saucer - 5 kosiew - 1 Crystal Zhou - 1 Kyle Barron - 1 Michele Gregori - 1 Nuno Faria -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - diff --git a/dev/changelog/49.0.0.md b/dev/changelog/49.0.0.md deleted file mode 100644 index 008bd43bc..000000000 --- a/dev/changelog/49.0.0.md +++ /dev/null @@ -1,61 +0,0 @@ - - -# Apache DataFusion Python 49.0.0 Changelog - -This release consists of 16 commits from 7 contributors. See credits at the end of this changelog for more information. - -**Fixed bugs:** - -- fix(build): Include build.rs in published crates [#1199](https://github.com/apache/datafusion-python/pull/1199) (colinmarc) - -**Other:** - -- 48.0.0 Release [#1175](https://github.com/apache/datafusion-python/pull/1175) (timsaucer) -- Update CI rules [#1188](https://github.com/apache/datafusion-python/pull/1188) (timsaucer) -- Fix Python UDAF Accumulator Interface example to Properly Handle State and Updates with List[Array] Types [#1192](https://github.com/apache/datafusion-python/pull/1192) (kosiew) -- chore: Upgrade datafusion to version 49 [#1200](https://github.com/apache/datafusion-python/pull/1200) (nuno-faria) -- Update how to dev instructions [#1179](https://github.com/apache/datafusion-python/pull/1179) (ntjohnson1) -- build(deps): bump object_store from 0.12.2 to 0.12.3 [#1189](https://github.com/apache/datafusion-python/pull/1189) (dependabot[bot]) -- build(deps): bump uuid from 1.17.0 to 1.18.0 [#1202](https://github.com/apache/datafusion-python/pull/1202) (dependabot[bot]) -- build(deps): bump async-trait from 0.1.88 to 0.1.89 [#1203](https://github.com/apache/datafusion-python/pull/1203) (dependabot[bot]) -- build(deps): bump slab from 0.4.10 to 0.4.11 [#1205](https://github.com/apache/datafusion-python/pull/1205) (dependabot[bot]) -- Improved window and aggregate function signature [#1187](https://github.com/apache/datafusion-python/pull/1187) (timsaucer) -- Optional improvements in verification instructions [#1183](https://github.com/apache/datafusion-python/pull/1183) (paleolimbot) -- Improve `show()` output for empty DataFrames [#1208](https://github.com/apache/datafusion-python/pull/1208) (kosiew) -- build(deps): bump actions/download-artifact from 4 to 5 [#1201](https://github.com/apache/datafusion-python/pull/1201) (dependabot[bot]) -- build(deps): bump url from 2.5.4 to 2.5.7 [#1210](https://github.com/apache/datafusion-python/pull/1210) (dependabot[bot]) -- build(deps): bump actions/checkout from 4 to 5 [#1204](https://github.com/apache/datafusion-python/pull/1204) (dependabot[bot]) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 7 dependabot[bot] - 3 Tim Saucer - 2 kosiew - 1 Colin Marc - 1 Dewey Dunnington - 1 Nick - 1 Nuno Faria -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - diff --git a/dev/changelog/50.0.0.md b/dev/changelog/50.0.0.md deleted file mode 100644 index c3f09d180..000000000 --- a/dev/changelog/50.0.0.md +++ /dev/null @@ -1,60 +0,0 @@ - - -# Apache DataFusion Python 50.0.0 Changelog - -This release consists of 12 commits from 7 contributors. See credits at the end of this changelog for more information. - -**Implemented enhancements:** - -- feat: allow passing a slice to and expression with the [] indexing [#1215](https://github.com/apache/datafusion-python/pull/1215) (timsaucer) - -**Documentation updates:** - -- docs: fix CaseBuilder documentation example [#1225](https://github.com/apache/datafusion-python/pull/1225) (IndexSeek) -- docs: update link to user example for custom table provider [#1224](https://github.com/apache/datafusion-python/pull/1224) (IndexSeek) -- docs: add apache iceberg as datafusion data source [#1240](https://github.com/apache/datafusion-python/pull/1240) (kevinjqliu) - -**Other:** - -- 49.0.0 release [#1211](https://github.com/apache/datafusion-python/pull/1211) (timsaucer) -- Update development guide in README.md [#1213](https://github.com/apache/datafusion-python/pull/1213) (YKoustubhRao) -- Add benchmark script and documentation for maximizing CPU usage in DataFusion Python [#1216](https://github.com/apache/datafusion-python/pull/1216) (kosiew) -- Fixing a few Typos [#1220](https://github.com/apache/datafusion-python/pull/1220) (ntjohnson1) -- Set fail on warning for documentation generation [#1218](https://github.com/apache/datafusion-python/pull/1218) (timsaucer) -- chore: remove redundant error transformation [#1232](https://github.com/apache/datafusion-python/pull/1232) (mesejo) -- Support string column identifiers for sort/aggregate/window and stricter Expr validation [#1221](https://github.com/apache/datafusion-python/pull/1221) (kosiew) -- Prepare for DF50 [#1231](https://github.com/apache/datafusion-python/pull/1231) (timsaucer) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 4 Tim Saucer - 2 Tyler White - 2 kosiew - 1 Daniel Mesejo - 1 Kevin Liu - 1 Koustubh Rao - 1 Nick -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - diff --git a/dev/changelog/50.1.0.md b/dev/changelog/50.1.0.md deleted file mode 100644 index 3b9ff84ff..000000000 --- a/dev/changelog/50.1.0.md +++ /dev/null @@ -1,57 +0,0 @@ - - -# Apache DataFusion Python 50.1.0 Changelog - -This release consists of 11 commits from 7 contributors. See credits at the end of this changelog for more information. - -**Breaking changes:** - -- Unify Table representations [#1256](https://github.com/apache/datafusion-python/pull/1256) (timsaucer) - -**Implemented enhancements:** - -- feat: expose DataFrame.write_table [#1264](https://github.com/apache/datafusion-python/pull/1264) (timsaucer) -- feat: expose` DataFrame.parse_sql_expr` [#1274](https://github.com/apache/datafusion-python/pull/1274) (milenkovicm) - -**Other:** - -- Update version number, add changelog [#1249](https://github.com/apache/datafusion-python/pull/1249) (timsaucer) -- Fix drop() method to handle quoted column names consistently [#1242](https://github.com/apache/datafusion-python/pull/1242) (H0TB0X420) -- Make Session Context `pyclass` frozen so interior mutability is only managed by rust [#1248](https://github.com/apache/datafusion-python/pull/1248) (ntjohnson1) -- macos-13 is deprecated [#1259](https://github.com/apache/datafusion-python/pull/1259) (kevinjqliu) -- Freeze PyO3 wrappers & introduce interior mutability to avoid PyO3 borrow errors [#1253](https://github.com/apache/datafusion-python/pull/1253) (kosiew) -- chore: update dependencies [#1269](https://github.com/apache/datafusion-python/pull/1269) (timsaucer) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 4 Tim Saucer - 2 Siew Kam Onn - 1 H0TB0X420 - 1 Kevin Liu - 1 Marko Milenković - 1 Nick - 1 kosiew -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - diff --git a/dev/changelog/51.0.0.md b/dev/changelog/51.0.0.md deleted file mode 100644 index cc157eb0d..000000000 --- a/dev/changelog/51.0.0.md +++ /dev/null @@ -1,74 +0,0 @@ - - -# Apache DataFusion Python 51.0.0 Changelog - -This release consists of 23 commits from 7 contributors. See credits at the end of this changelog for more information. - -**Breaking changes:** - -- feat: reduce duplicate fields on join [#1184](https://github.com/apache/datafusion-python/pull/1184) (timsaucer) - -**Implemented enhancements:** - -- feat: expose `select_exprs` method on DataFrame [#1271](https://github.com/apache/datafusion-python/pull/1271) (milenkovicm) -- feat: allow DataFrame.filter to accept SQL strings [#1276](https://github.com/apache/datafusion-python/pull/1276) (K-dash) -- feat: add temporary view option for into_view [#1267](https://github.com/apache/datafusion-python/pull/1267) (timsaucer) -- feat: support session token parameter for AmazonS3 [#1275](https://github.com/apache/datafusion-python/pull/1275) (GCHQDeveloper028) -- feat: `with_column` supports SQL expression [#1284](https://github.com/apache/datafusion-python/pull/1284) (milenkovicm) -- feat: Add SQL expression for `repartition_by_hash` [#1285](https://github.com/apache/datafusion-python/pull/1285) (milenkovicm) -- feat: Add SQL expression support for `with_columns` [#1286](https://github.com/apache/datafusion-python/pull/1286) (milenkovicm) - -**Fixed bugs:** - -- fix: use coalesce instead of drop_duplicate_keys for join [#1318](https://github.com/apache/datafusion-python/pull/1318) (mesejo) -- fix: Inconsistent schemas when converting to pyarrow [#1315](https://github.com/apache/datafusion-python/pull/1315) (nuno-faria) - -**Other:** - -- Release 50.1 [#1281](https://github.com/apache/datafusion-python/pull/1281) (timsaucer) -- Update python minimum version to 3.10 [#1296](https://github.com/apache/datafusion-python/pull/1296) (timsaucer) -- chore: update datafusion minor version [#1297](https://github.com/apache/datafusion-python/pull/1297) (timsaucer) -- Enable remaining pylints [#1298](https://github.com/apache/datafusion-python/pull/1298) (timsaucer) -- Add Arrow C streaming, DataFrame iteration, and OOM-safe streaming execution [#1222](https://github.com/apache/datafusion-python/pull/1222) (kosiew) -- Add PyCapsule Type Support and Type Hint Enhancements for AggregateUDF in DataFusion Python Bindings [#1277](https://github.com/apache/datafusion-python/pull/1277) (kosiew) -- Add collect_column to dataframe [#1302](https://github.com/apache/datafusion-python/pull/1302) (timsaucer) -- chore: apply cargo fmt with import organization [#1303](https://github.com/apache/datafusion-python/pull/1303) (timsaucer) -- Feat/parameterized sql queries [#964](https://github.com/apache/datafusion-python/pull/964) (timsaucer) -- Upgrade to Datafusion 51 [#1311](https://github.com/apache/datafusion-python/pull/1311) (nuno-faria) -- minor: resolve build errors after latest merge into main [#1325](https://github.com/apache/datafusion-python/pull/1325) (timsaucer) -- Update build workflow link [#1330](https://github.com/apache/datafusion-python/pull/1330) (timsaucer) -- Do not convert pyarrow scalar values to plain python types when passing as `lit` [#1319](https://github.com/apache/datafusion-python/pull/1319) (timsaucer) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 12 Tim Saucer - 4 Marko Milenković - 2 Nuno Faria - 2 kosiew - 1 Daniel Mesejo - 1 GCHQDeveloper028 - 1 𝕂 -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - diff --git a/dev/changelog/52.0.0.md b/dev/changelog/52.0.0.md deleted file mode 100644 index 3f848bb47..000000000 --- a/dev/changelog/52.0.0.md +++ /dev/null @@ -1,78 +0,0 @@ - - -# Apache DataFusion Python 52.0.0 Changelog - -This release consists of 26 commits from 9 contributors. See credits at the end of this changelog for more information. - -**Implemented enhancements:** - -- feat: add CatalogProviderList support [#1363](https://github.com/apache/datafusion-python/pull/1363) (timsaucer) -- feat: add support for generating JSON formatted substrait plan [#1376](https://github.com/apache/datafusion-python/pull/1376) (Prathamesh9284) -- feat: add regexp_instr function [#1382](https://github.com/apache/datafusion-python/pull/1382) (mesejo) - -**Fixed bugs:** - -- fix: mangled errors [#1377](https://github.com/apache/datafusion-python/pull/1377) (mesejo) - -**Documentation updates:** - -- docs: Clarify first_value usage in select vs aggregate [#1348](https://github.com/apache/datafusion-python/pull/1348) (AdMub) - -**Other:** - -- Release 51.0.0 [#1333](https://github.com/apache/datafusion-python/pull/1333) (timsaucer) -- Use explicit timer in unit test [#1338](https://github.com/apache/datafusion-python/pull/1338) (timsaucer) -- Add use_fabric_endpoint parameter to MicrosoftAzure class [#1357](https://github.com/apache/datafusion-python/pull/1357) (djouallah) -- Prepare for DF52 release [#1337](https://github.com/apache/datafusion-python/pull/1337) (timsaucer) -- build(deps): bump actions/checkout from 5 to 6 [#1310](https://github.com/apache/datafusion-python/pull/1310) (dependabot[bot]) -- build(deps): bump actions/download-artifact from 5 to 7 [#1321](https://github.com/apache/datafusion-python/pull/1321) (dependabot[bot]) -- build(deps): bump actions/upload-artifact from 4 to 6 [#1322](https://github.com/apache/datafusion-python/pull/1322) (dependabot[bot]) -- build(deps): bump actions/cache from 4 to 5 [#1323](https://github.com/apache/datafusion-python/pull/1323) (dependabot[bot]) -- Pass Field information back and forth when using scalar UDFs [#1299](https://github.com/apache/datafusion-python/pull/1299) (timsaucer) -- Update dependency minor versions to prepare for DF52 release [#1368](https://github.com/apache/datafusion-python/pull/1368) (timsaucer) -- Improve displayed error by using `DataFusionError`'s `Display` trait [#1370](https://github.com/apache/datafusion-python/pull/1370) (abey79) -- Enforce DataFrame display memory limits with `max_rows` + `min_rows` constraint (deprecate `repr_rows`) [#1367](https://github.com/apache/datafusion-python/pull/1367) (kosiew) -- Implement all CSV reader options [#1361](https://github.com/apache/datafusion-python/pull/1361) (timsaucer) -- chore: add confirmation before tarball is released [#1372](https://github.com/apache/datafusion-python/pull/1372) (milenkovicm) -- Build in debug mode for PRs [#1375](https://github.com/apache/datafusion-python/pull/1375) (timsaucer) -- minor: remove ffi test wheel from distribution artifact [#1378](https://github.com/apache/datafusion-python/pull/1378) (timsaucer) -- chore: update rust 2024 edition [#1371](https://github.com/apache/datafusion-python/pull/1371) (timsaucer) -- Fix Python UDAF list-of-timestamps return by enforcing list-valued scalars and caching PyArrow types [#1347](https://github.com/apache/datafusion-python/pull/1347) (kosiew) -- minor: update cargo dependencies [#1383](https://github.com/apache/datafusion-python/pull/1383) (timsaucer) -- chore: bump Python version for RAT checking [#1386](https://github.com/apache/datafusion-python/pull/1386) (timsaucer) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 13 Tim Saucer - 4 dependabot[bot] - 2 Daniel Mesejo - 2 kosiew - 1 Adisa Mubarak (AdMub) - 1 Antoine Beyeler - 1 Dhanashri Prathamesh Iranna - 1 Marko Milenković - 1 Mimoune -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - diff --git a/dev/changelog/pre-43.0.0.md b/dev/changelog/pre-43.0.0.md deleted file mode 100644 index ae3a2348a..000000000 --- a/dev/changelog/pre-43.0.0.md +++ /dev/null @@ -1,715 +0,0 @@ - - -# DataFusion Python Changelog - -## [42.0.0](https://github.com/apache/datafusion-python/tree/42.0.0) (2024-10-06) - -This release consists of 20 commits from 6 contributors. See credits at the end of this changelog for more information. - -**Implemented enhancements:** - -- feat: expose between [#868](https://github.com/apache/datafusion-python/pull/868) (mesejo) -- feat: make register_csv accept a list of paths [#883](https://github.com/apache/datafusion-python/pull/883) (mesejo) -- feat: expose http object store [#885](https://github.com/apache/datafusion-python/pull/885) (mesejo) - -**Fixed bugs:** - -- fix: Calling `count` on a pyarrow dataset results in an error [#843](https://github.com/apache/datafusion-python/pull/843) (Michael-J-Ward) - -**Other:** - -- Upgrade datafusion [#867](https://github.com/apache/datafusion-python/pull/867) (emgeee) -- Feature/aggregates as windows [#871](https://github.com/apache/datafusion-python/pull/871) (timsaucer) -- Fix regression on register_udaf [#878](https://github.com/apache/datafusion-python/pull/878) (timsaucer) -- build(deps): upgrade setup-protoc action and protoc version number [#873](https://github.com/apache/datafusion-python/pull/873) (Michael-J-Ward) -- build(deps): bump prost-types from 0.13.2 to 0.13.3 [#881](https://github.com/apache/datafusion-python/pull/881) (dependabot[bot]) -- build(deps): bump prost from 0.13.2 to 0.13.3 [#882](https://github.com/apache/datafusion-python/pull/882) (dependabot[bot]) -- chore: remove XFAIL from passing tests [#884](https://github.com/apache/datafusion-python/pull/884) (Michael-J-Ward) -- Add user defined window function support [#880](https://github.com/apache/datafusion-python/pull/880) (timsaucer) -- build(deps): bump syn from 2.0.77 to 2.0.79 [#886](https://github.com/apache/datafusion-python/pull/886) (dependabot[bot]) -- fix example of reading parquet from s3 [#896](https://github.com/apache/datafusion-python/pull/896) (sir-sigurd) -- release-testing [#889](https://github.com/apache/datafusion-python/pull/889) (Michael-J-Ward) -- chore(bench): fix create_tables.sql for tpch benchmark [#897](https://github.com/apache/datafusion-python/pull/897) (Michael-J-Ward) -- Add physical and logical plan conversion to and from protobuf [#892](https://github.com/apache/datafusion-python/pull/892) (timsaucer) -- Feature/instance udfs [#890](https://github.com/apache/datafusion-python/pull/890) (timsaucer) -- chore(ci): remove Mambaforge variant from CI [#894](https://github.com/apache/datafusion-python/pull/894) (Michael-J-Ward) -- Use OnceLock to store TokioRuntime [#895](https://github.com/apache/datafusion-python/pull/895) (Michael-J-Ward) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 7 Michael J Ward - 5 Tim Saucer - 3 Daniel Mesejo - 3 dependabot[bot] - 1 Matt Green - 1 Sergey Fedoseev -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - -## [41.0.0](https://github.com/apache/datafusion-python/tree/41.0.0) (2024-09-09) - -This release consists of 19 commits from 6 contributors. See credits at the end of this changelog for more information. - -**Implemented enhancements:** - -- feat: enable list of paths for read_csv [#824](https://github.com/apache/datafusion-python/pull/824) (mesejo) -- feat: better exception and message for table not found [#851](https://github.com/apache/datafusion-python/pull/851) (mesejo) -- feat: make cast accept built-in Python types [#858](https://github.com/apache/datafusion-python/pull/858) (mesejo) - -**Other:** - -- chore: Prepare for 40.0.0 release [#801](https://github.com/apache/datafusion-python/pull/801) (andygrove) -- Add typing-extensions dependency to pyproject [#805](https://github.com/apache/datafusion-python/pull/805) (timsaucer) -- Upgrade deps to datafusion 41 [#802](https://github.com/apache/datafusion-python/pull/802) (Michael-J-Ward) -- Fix SessionContext init with only SessionConfig [#827](https://github.com/apache/datafusion-python/pull/827) (jcrist) -- build(deps): upgrade actions/{upload,download}-artifact@v3 to v4 [#829](https://github.com/apache/datafusion-python/pull/829) (Michael-J-Ward) -- Run ruff format in CI [#837](https://github.com/apache/datafusion-python/pull/837) (timsaucer) -- Add PyCapsule support for Arrow import and export [#825](https://github.com/apache/datafusion-python/pull/825) (timsaucer) -- Feature/expose when function [#836](https://github.com/apache/datafusion-python/pull/836) (timsaucer) -- Add Window Functions for use with function builder [#808](https://github.com/apache/datafusion-python/pull/808) (timsaucer) -- chore: fix typos [#844](https://github.com/apache/datafusion-python/pull/844) (mesejo) -- build(ci): use proper mac runners [#841](https://github.com/apache/datafusion-python/pull/841) (Michael-J-Ward) -- Set of small features [#839](https://github.com/apache/datafusion-python/pull/839) (timsaucer) -- chore: fix docstrings, typos [#852](https://github.com/apache/datafusion-python/pull/852) (mesejo) -- chore: Use datafusion re-exported dependencies [#856](https://github.com/apache/datafusion-python/pull/856) (emgeee) -- add guidelines on separating python and rust code [#860](https://github.com/apache/datafusion-python/pull/860) (Michael-J-Ward) -- Update Aggregate functions to take builder parameters [#859](https://github.com/apache/datafusion-python/pull/859) (timsaucer) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 7 Tim Saucer - 5 Daniel Mesejo - 4 Michael J Ward - 1 Andy Grove - 1 Jim Crist-Harif - 1 Matt Green -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - -## [40.0.0](https://github.com/apache/datafusion-python/tree/40.0.0) (2024-08-09) - -This release consists of 18 commits from 4 contributors. See credits at the end of this changelog for more information. - -- Update changelog for 39.0.0 [#742](https://github.com/apache/datafusion-python/pull/742) (andygrove) -- build(deps): bump uuid from 1.8.0 to 1.9.1 [#744](https://github.com/apache/datafusion-python/pull/744) (dependabot[bot]) -- build(deps): bump mimalloc from 0.1.42 to 0.1.43 [#745](https://github.com/apache/datafusion-python/pull/745) (dependabot[bot]) -- build(deps): bump syn from 2.0.67 to 2.0.68 [#746](https://github.com/apache/datafusion-python/pull/746) (dependabot[bot]) -- Tsaucer/find window fn [#747](https://github.com/apache/datafusion-python/pull/747) (timsaucer) -- Python wrapper classes for all user interfaces [#750](https://github.com/apache/datafusion-python/pull/750) (timsaucer) -- Expose array sort [#764](https://github.com/apache/datafusion-python/pull/764) (timsaucer) -- Upgrade protobuf and remove GH Action googletest-installer [#773](https://github.com/apache/datafusion-python/pull/773) (Michael-J-Ward) -- Upgrade Datafusion 40 [#771](https://github.com/apache/datafusion-python/pull/771) (Michael-J-Ward) -- Bugfix: Calling count with None arguments [#768](https://github.com/apache/datafusion-python/pull/768) (timsaucer) -- Add in user example that compares a two different approaches to UDFs [#770](https://github.com/apache/datafusion-python/pull/770) (timsaucer) -- Add missing exports for wrapper modules [#782](https://github.com/apache/datafusion-python/pull/782) (timsaucer) -- Add PyExpr to_variant conversions [#793](https://github.com/apache/datafusion-python/pull/793) (Michael-J-Ward) -- Add missing expressions to wrapper export [#795](https://github.com/apache/datafusion-python/pull/795) (timsaucer) -- Doc/cross reference [#791](https://github.com/apache/datafusion-python/pull/791) (timsaucer) -- Re-Enable `num_centroids` to `approx_percentile_cont` [#798](https://github.com/apache/datafusion-python/pull/798) (Michael-J-Ward) -- UDAF process all state variables [#799](https://github.com/apache/datafusion-python/pull/799) (timsaucer) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 9 Tim Saucer - 4 Michael J Ward - 3 dependabot[bot] - 2 Andy Grove -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - -## [39.0.0](https://github.com/apache/datafusion-python/tree/39.0.0) (2024-06-25) - -**Merged pull requests:** - -- ci: add substrait feature to linux builds [#720](https://github.com/apache/datafusion-python/pull/720) (Michael-J-Ward) -- Docs deploy action [#721](https://github.com/apache/datafusion-python/pull/721) (Michael-J-Ward) -- update deps [#723](https://github.com/apache/datafusion-python/pull/723) (Michael-J-Ward) -- Upgrade maturin [#725](https://github.com/apache/datafusion-python/pull/725) (Michael-J-Ward) -- Upgrade datafusion 39 [#728](https://github.com/apache/datafusion-python/pull/728) (Michael-J-Ward) -- use ScalarValue::to_pyarrow to convert to python object [#731](https://github.com/apache/datafusion-python/pull/731) (Michael-J-Ward) -- Pyo3 `Bound<'py, T>` api [#734](https://github.com/apache/datafusion-python/pull/734) (Michael-J-Ward) -- github test action: drop python 3.7, add python 3.12 [#736](https://github.com/apache/datafusion-python/pull/736) (Michael-J-Ward) -- Pyarrow filter pushdowns [#735](https://github.com/apache/datafusion-python/pull/735) (Michael-J-Ward) -- build(deps): bump syn from 2.0.66 to 2.0.67 [#738](https://github.com/apache/datafusion-python/pull/738) (dependabot[bot]) -- Pyo3 refactorings [#740](https://github.com/apache/datafusion-python/pull/740) (Michael-J-Ward) -- UDAF `sum` workaround [#741](https://github.com/apache/datafusion-python/pull/741) (Michael-J-Ward) - -## [38.0.1](https://github.com/apache/datafusion-python/tree/38.0.1) (2024-05-25) - -**Implemented enhancements:** - -- feat: add python bindings for ends_with function [#693](https://github.com/apache/datafusion-python/pull/693) (richtia) -- feat: expose `named_struct` in python [#700](https://github.com/apache/datafusion-python/pull/700) (Michael-J-Ward) - -**Merged pull requests:** - -- Add document about basics of working with expressions [#668](https://github.com/apache/datafusion-python/pull/668) (timsaucer) -- chore: Update Python release process now that DataFusion is TLP [#674](https://github.com/apache/datafusion-python/pull/674) (andygrove) -- Fix Docs [#676](https://github.com/apache/datafusion-python/pull/676) (Michael-J-Ward) -- Add examples from TPC-H [#666](https://github.com/apache/datafusion-python/pull/666) (timsaucer) -- fix conda nightly builds, attempt 2 [#689](https://github.com/apache/datafusion-python/pull/689) (Michael-J-Ward) -- Upgrade to datafusion 38 [#691](https://github.com/apache/datafusion-python/pull/691) (Michael-J-Ward) -- chore: update to maturin's recommended project layout for rust/python… [#695](https://github.com/apache/datafusion-python/pull/695) (Michael-J-Ward) -- chore: update cargo deps [#698](https://github.com/apache/datafusion-python/pull/698) (Michael-J-Ward) -- feat: add python bindings for ends_with function [#693](https://github.com/apache/datafusion-python/pull/693) (richtia) -- feat: expose `named_struct` in python [#700](https://github.com/apache/datafusion-python/pull/700) (Michael-J-Ward) -- Website fixes [#702](https://github.com/apache/datafusion-python/pull/702) (Michael-J-Ward) - -## [37.1.0](https://github.com/apache/datafusion-python/tree/37.1.0) (2024-05-08) - -**Implemented enhancements:** - -- feat: add execute_stream and execute_stream_partitioned [#610](https://github.com/apache/datafusion-python/pull/610) (mesejo) - -**Documentation updates:** - -- docs: update docs CI to install python-311 requirements [#661](https://github.com/apache/datafusion-python/pull/661) (Michael-J-Ward) - -**Merged pull requests:** - -- Switch to Ruff for Python linting [#529](https://github.com/apache/datafusion-python/pull/529) (andygrove) -- Remove sql-on-pandas/polars/cudf examples [#602](https://github.com/apache/datafusion-python/pull/602) (andygrove) -- build(deps): bump object_store from 0.9.0 to 0.9.1 [#611](https://github.com/apache/datafusion-python/pull/611) (dependabot[bot]) -- More missing array funcs [#605](https://github.com/apache/datafusion-python/pull/605) (judahrand) -- feat: add execute_stream and execute_stream_partitioned [#610](https://github.com/apache/datafusion-python/pull/610) (mesejo) -- build(deps): bump uuid from 1.7.0 to 1.8.0 [#615](https://github.com/apache/datafusion-python/pull/615) (dependabot[bot]) -- Bind SQLOptions and relative ctx method #567 [#588](https://github.com/apache/datafusion-python/pull/588) (giacomorebecchi) -- bugfix: no panic on empty table [#613](https://github.com/apache/datafusion-python/pull/613) (mesejo) -- Expose `register_listing_table` [#618](https://github.com/apache/datafusion-python/pull/618) (henrifroese) -- Expose unnest feature [#641](https://github.com/apache/datafusion-python/pull/641) (timsaucer) -- Update domain names and paths in asf yaml [#643](https://github.com/apache/datafusion-python/pull/643) (andygrove) -- use python 3.11 to publish docs [#645](https://github.com/apache/datafusion-python/pull/645) (andygrove) -- docs: update docs CI to install python-311 requirements [#661](https://github.com/apache/datafusion-python/pull/661) (Michael-J-Ward) -- Upgrade Datafusion to v37.1.0 [#669](https://github.com/apache/datafusion-python/pull/669) (Michael-J-Ward) - -## [36.0.0](https://github.com/apache/datafusion-python/tree/36.0.0) (2024-03-02) - -**Implemented enhancements:** - -- feat: Add `flatten` array function [#562](https://github.com/apache/datafusion-python/pull/562) (mobley-trent) - -**Documentation updates:** - -- docs: Add ASF attribution [#580](https://github.com/apache/datafusion-python/pull/580) (simicd) - -**Merged pull requests:** - -- Allow PyDataFrame to be used from other projects [#582](https://github.com/apache/datafusion-python/pull/582) (andygrove) -- docs: Add ASF attribution [#580](https://github.com/apache/datafusion-python/pull/580) (simicd) -- Add array functions [#560](https://github.com/apache/datafusion-python/pull/560) (ongchi) -- feat: Add `flatten` array function [#562](https://github.com/apache/datafusion-python/pull/562) (mobley-trent) - -## [35.0.0](https://github.com/apache/datafusion-python/tree/35.0.0) (2024-01-20) - -**Merged pull requests:** - -- build(deps): bump syn from 2.0.41 to 2.0.43 [#559](https://github.com/apache/datafusion-python/pull/559) (dependabot[bot]) -- build(deps): bump tokio from 1.35.0 to 1.35.1 [#558](https://github.com/apache/datafusion-python/pull/558) (dependabot[bot]) -- build(deps): bump async-trait from 0.1.74 to 0.1.77 [#556](https://github.com/apache/datafusion-python/pull/556) (dependabot[bot]) -- build(deps): bump pyo3 from 0.20.0 to 0.20.2 [#557](https://github.com/apache/datafusion-python/pull/557) (dependabot[bot]) - -## [34.0.0](https://github.com/apache/datafusion-python/tree/34.0.0) (2023-12-28) - -**Merged pull requests:** - -- Adjust visibility of crate private members & Functions [#537](https://github.com/apache/datafusion-python/pull/537) (jdye64) -- Update json.rst [#538](https://github.com/apache/datafusion-python/pull/538) (ray-andrew) -- Enable mimalloc local_dynamic_tls feature [#540](https://github.com/apache/datafusion-python/pull/540) (jdye64) -- Enable substrait feature to be built by default in CI, for nightlies … [#544](https://github.com/apache/datafusion-python/pull/544) (jdye64) - -## [33.0.0](https://github.com/apache/datafusion-python/tree/33.0.0) (2023-11-16) - -**Merged pull requests:** - -- First pass at getting architectured builds working [#350](https://github.com/apache/datafusion-python/pull/350) (charlesbluca) -- Remove libprotobuf dep [#527](https://github.com/apache/datafusion-python/pull/527) (jdye64) - -## [32.0.0](https://github.com/apache/datafusion-python/tree/32.0.0) (2023-10-21) - -**Implemented enhancements:** - -- feat: expose PyWindowFrame [#509](https://github.com/apache/datafusion-python/pull/509) (dlovell) -- add Binary String Functions;encode,decode [#494](https://github.com/apache/datafusion-python/pull/494) (jiangzhx) -- add bit_and,bit_or,bit_xor,bool_add,bool_or [#496](https://github.com/apache/datafusion-python/pull/496) (jiangzhx) -- add first_value last_value [#498](https://github.com/apache/datafusion-python/pull/498) (jiangzhx) -- add regr\_\* functions [#499](https://github.com/apache/datafusion-python/pull/499) (jiangzhx) -- Add random missing bindings [#522](https://github.com/apache/datafusion-python/pull/522) (jdye64) -- Allow for multiple input files per table instead of a single file [#519](https://github.com/apache/datafusion-python/pull/519) (jdye64) -- Add support for window function bindings [#521](https://github.com/apache/datafusion-python/pull/521) (jdye64) - -**Merged pull requests:** - -- Prepare 31.0.0 release [#500](https://github.com/apache/datafusion-python/pull/500) (andygrove) -- Improve release process documentation [#505](https://github.com/apache/datafusion-python/pull/505) (andygrove) -- add Binary String Functions;encode,decode [#494](https://github.com/apache/datafusion-python/pull/494) (jiangzhx) -- build(deps): bump mimalloc from 0.1.38 to 0.1.39 [#502](https://github.com/apache/datafusion-python/pull/502) (dependabot[bot]) -- build(deps): bump syn from 2.0.32 to 2.0.35 [#503](https://github.com/apache/datafusion-python/pull/503) (dependabot[bot]) -- build(deps): bump syn from 2.0.35 to 2.0.37 [#506](https://github.com/apache/datafusion-python/pull/506) (dependabot[bot]) -- Use latest DataFusion [#511](https://github.com/apache/datafusion-python/pull/511) (andygrove) -- add bit_and,bit_or,bit_xor,bool_add,bool_or [#496](https://github.com/apache/datafusion-python/pull/496) (jiangzhx) -- use DataFusion 32 [#515](https://github.com/apache/datafusion-python/pull/515) (andygrove) -- add first_value last_value [#498](https://github.com/apache/datafusion-python/pull/498) (jiangzhx) -- build(deps): bump regex-syntax from 0.7.5 to 0.8.1 [#517](https://github.com/apache/datafusion-python/pull/517) (dependabot[bot]) -- build(deps): bump pyo3-build-config from 0.19.2 to 0.20.0 [#516](https://github.com/apache/datafusion-python/pull/516) (dependabot[bot]) -- add regr\_\* functions [#499](https://github.com/apache/datafusion-python/pull/499) (jiangzhx) -- Add random missing bindings [#522](https://github.com/apache/datafusion-python/pull/522) (jdye64) -- build(deps): bump rustix from 0.38.18 to 0.38.19 [#523](https://github.com/apache/datafusion-python/pull/523) (dependabot[bot]) -- Allow for multiple input files per table instead of a single file [#519](https://github.com/apache/datafusion-python/pull/519) (jdye64) -- Add support for window function bindings [#521](https://github.com/apache/datafusion-python/pull/521) (jdye64) -- Small clippy fix [#524](https://github.com/apache/datafusion-python/pull/524) (andygrove) - -## [31.0.0](https://github.com/apache/datafusion-python/tree/31.0.0) (2023-09-12) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/28.0.0...31.0.0) - -**Implemented enhancements:** - -- feat: add case function (#447) [#448](https://github.com/apache/datafusion-python/pull/448) (mesejo) -- feat: add compression options [#456](https://github.com/apache/datafusion-python/pull/456) (mesejo) -- feat: add register_json [#458](https://github.com/apache/datafusion-python/pull/458) (mesejo) -- feat: add basic compression configuration to write_parquet [#459](https://github.com/apache/datafusion-python/pull/459) (mesejo) -- feat: add example of reading parquet from s3 [#460](https://github.com/apache/datafusion-python/pull/460) (mesejo) -- feat: add register_avro and read_table [#461](https://github.com/apache/datafusion-python/pull/461) (mesejo) -- feat: add missing scalar math functions [#465](https://github.com/apache/datafusion-python/pull/465) (mesejo) - -**Documentation updates:** - -- docs: include pre-commit hooks section in contributor guide [#455](https://github.com/apache/datafusion-python/pull/455) (mesejo) - -**Merged pull requests:** - -- Build Linux aarch64 wheel [#443](https://github.com/apache/datafusion-python/pull/443) (gokselk) -- feat: add case function (#447) [#448](https://github.com/apache/datafusion-python/pull/448) (mesejo) -- enhancement(docs): Add user guide (#432) [#445](https://github.com/apache/datafusion-python/pull/445) (mesejo) -- docs: include pre-commit hooks section in contributor guide [#455](https://github.com/apache/datafusion-python/pull/455) (mesejo) -- feat: add compression options [#456](https://github.com/apache/datafusion-python/pull/456) (mesejo) -- Upgrade to DF 28.0.0-rc1 [#457](https://github.com/apache/datafusion-python/pull/457) (andygrove) -- feat: add register_json [#458](https://github.com/apache/datafusion-python/pull/458) (mesejo) -- feat: add basic compression configuration to write_parquet [#459](https://github.com/apache/datafusion-python/pull/459) (mesejo) -- feat: add example of reading parquet from s3 [#460](https://github.com/apache/datafusion-python/pull/460) (mesejo) -- feat: add register_avro and read_table [#461](https://github.com/apache/datafusion-python/pull/461) (mesejo) -- feat: add missing scalar math functions [#465](https://github.com/apache/datafusion-python/pull/465) (mesejo) -- build(deps): bump arduino/setup-protoc from 1 to 2 [#452](https://github.com/apache/datafusion-python/pull/452) (dependabot[bot]) -- Revert "build(deps): bump arduino/setup-protoc from 1 to 2 (#452)" [#474](https://github.com/apache/datafusion-python/pull/474) (viirya) -- Minor: fix wrongly copied function description [#497](https://github.com/apache/datafusion-python/pull/497) (viirya) -- Upgrade to Datafusion 31.0.0 [#491](https://github.com/apache/datafusion-python/pull/491) (judahrand) -- Add `isnan` and `iszero` [#495](https://github.com/apache/datafusion-python/pull/495) (judahrand) - -## 30.0.0 - -- Skipped due to a breaking change in DataFusion - -## 29.0.0 - -- Skipped - -## [28.0.0](https://github.com/apache/datafusion-python/tree/28.0.0) (2023-07-25) - -**Implemented enhancements:** - -- feat: expose offset in python API [#437](https://github.com/apache/datafusion-python/pull/437) (cpcloud) - -**Merged pull requests:** - -- File based input utils [#433](https://github.com/apache/datafusion-python/pull/433) (jdye64) -- Upgrade to 28.0.0-rc1 [#434](https://github.com/apache/datafusion-python/pull/434) (andygrove) -- Introduces utility for obtaining SqlTable information from a file like location [#398](https://github.com/apache/datafusion-python/pull/398) (jdye64) -- feat: expose offset in python API [#437](https://github.com/apache/datafusion-python/pull/437) (cpcloud) -- Use DataFusion 28 [#439](https://github.com/apache/datafusion-python/pull/439) (andygrove) - -## [27.0.0](https://github.com/apache/datafusion-python/tree/27.0.0) (2023-07-03) - -**Merged pull requests:** - -- LogicalPlan.to_variant() make public [#412](https://github.com/apache/datafusion-python/pull/412) (jdye64) -- Prepare 27.0.0 release [#423](https://github.com/apache/datafusion-python/pull/423) (andygrove) - -## [26.0.0](https://github.com/apache/datafusion-python/tree/26.0.0) (2023-06-11) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/25.0.0...26.0.0) - -**Merged pull requests:** - -- Add Expr::Case when_then_else support to rex_call_operands function [#388](https://github.com/apache/datafusion-python/pull/388) (jdye64) -- Introduce BaseSessionContext abstract class [#390](https://github.com/apache/datafusion-python/pull/390) (jdye64) -- CRUD Schema support for `BaseSessionContext` [#392](https://github.com/apache/datafusion-python/pull/392) (jdye64) -- CRUD Table support for `BaseSessionContext` [#394](https://github.com/apache/datafusion-python/pull/394) (jdye64) - -## [25.0.0](https://github.com/apache/datafusion-python/tree/25.0.0) (2023-05-23) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/24.0.0...25.0.0) - -**Merged pull requests:** - -- Prepare 24.0.0 Release [#376](https://github.com/apache/datafusion-python/pull/376) (andygrove) -- build(deps): bump uuid from 1.3.1 to 1.3.2 [#359](https://github.com/apache/datafusion-python/pull/359) (dependabot[bot]) -- build(deps): bump mimalloc from 0.1.36 to 0.1.37 [#361](https://github.com/apache/datafusion-python/pull/361) (dependabot[bot]) -- build(deps): bump regex-syntax from 0.6.29 to 0.7.1 [#334](https://github.com/apache/datafusion-python/pull/334) (dependabot[bot]) -- upgrade maturin to 0.15.1 [#379](https://github.com/apache/datafusion-python/pull/379) (Jimexist) -- Expand Expr to include RexType basic support [#378](https://github.com/apache/datafusion-python/pull/378) (jdye64) -- Add Python script for generating changelog [#383](https://github.com/apache/datafusion-python/pull/383) (andygrove) - -## [24.0.0](https://github.com/apache/datafusion-python/tree/24.0.0) (2023-05-09) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/23.0.0...24.0.0) - -**Documentation updates:** - -- Fix link to user guide [#354](https://github.com/apache/datafusion-python/pull/354) (andygrove) - -**Merged pull requests:** - -- Add interface to serialize Substrait plans to Python Bytes. [#344](https://github.com/apache/datafusion-python/pull/344) (kylebrooks-8451) -- Add partition_count property to ExecutionPlan. [#346](https://github.com/apache/datafusion-python/pull/346) (kylebrooks-8451) -- Remove unsendable from all Rust pyclass types. [#348](https://github.com/apache/datafusion-python/pull/348) (kylebrooks-8451) -- Fix link to user guide [#354](https://github.com/apache/datafusion-python/pull/354) (andygrove) -- Fix SessionContext execute. [#353](https://github.com/apache/datafusion-python/pull/353) (kylebrooks-8451) -- Pub mod expr in lib.rs [#357](https://github.com/apache/datafusion-python/pull/357) (jdye64) -- Add benchmark derived from TPC-H [#355](https://github.com/apache/datafusion-python/pull/355) (andygrove) -- Add db-benchmark [#365](https://github.com/apache/datafusion-python/pull/365) (andygrove) -- First pass of documentation in mdBook [#364](https://github.com/apache/datafusion-python/pull/364) (MrPowers) -- Add 'pub' and '#[pyo3(get, set)]' to DataTypeMap [#371](https://github.com/apache/datafusion-python/pull/371) (jdye64) -- Fix db-benchmark [#369](https://github.com/apache/datafusion-python/pull/369) (andygrove) -- Docs explaining how to view query plans [#373](https://github.com/apache/datafusion-python/pull/373) (andygrove) -- Improve db-benchmark [#372](https://github.com/apache/datafusion-python/pull/372) (andygrove) -- Make expr member of PyExpr public [#375](https://github.com/apache/datafusion-python/pull/375) (jdye64) - -## [23.0.0](https://github.com/apache/datafusion-python/tree/23.0.0) (2023-04-23) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/22.0.0...23.0.0) - -**Merged pull requests:** - -- Improve API docs, README, and examples for configuring context [#321](https://github.com/apache/datafusion-python/pull/321) (andygrove) -- Osx build linker args [#330](https://github.com/apache/datafusion-python/pull/330) (jdye64) -- Add requirements file for python 3.11 [#332](https://github.com/apache/datafusion-python/pull/332) (r4ntix) -- mac arm64 build [#338](https://github.com/apache/datafusion-python/pull/338) (andygrove) -- Add conda.yaml baseline workflow file [#281](https://github.com/apache/datafusion-python/pull/281) (jdye64) -- Prepare for 23.0.0 release [#335](https://github.com/apache/datafusion-python/pull/335) (andygrove) -- Reuse the Tokio Runtime [#341](https://github.com/apache/datafusion-python/pull/341) (kylebrooks-8451) - -## [22.0.0](https://github.com/apache/datafusion-python/tree/22.0.0) (2023-04-10) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/21.0.0...22.0.0) - -**Merged pull requests:** - -- Fix invalid build yaml [#308](https://github.com/apache/datafusion-python/pull/308) (andygrove) -- Try fix release build [#309](https://github.com/apache/datafusion-python/pull/309) (andygrove) -- Fix release build [#310](https://github.com/apache/datafusion-python/pull/310) (andygrove) -- Enable datafusion-substrait protoc feature, to remove compile-time dependency on protoc [#312](https://github.com/apache/datafusion-python/pull/312) (andygrove) -- Fix Mac/Win release builds in CI [#313](https://github.com/apache/datafusion-python/pull/313) (andygrove) -- install protoc in docs workflow [#314](https://github.com/apache/datafusion-python/pull/314) (andygrove) -- Fix documentation generation in CI [#315](https://github.com/apache/datafusion-python/pull/315) (andygrove) -- Source wheel fix [#319](https://github.com/apache/datafusion-python/pull/319) (andygrove) - -## [21.0.0](https://github.com/apache/datafusion-python/tree/21.0.0) (2023-03-30) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/20.0.0...21.0.0) - -**Merged pull requests:** - -- minor: Fix minor warning on unused import [#289](https://github.com/apache/datafusion-python/pull/289) (viirya) -- feature: Implement `describe()` method [#293](https://github.com/apache/datafusion-python/pull/293) (simicd) -- fix: Printed results not visible in debugger & notebooks [#296](https://github.com/apache/datafusion-python/pull/296) (simicd) -- add package.include and remove wildcard dependency [#295](https://github.com/apache/datafusion-python/pull/295) (andygrove) -- Update main branch name in docs workflow [#303](https://github.com/apache/datafusion-python/pull/303) (andygrove) -- Upgrade to DF 21 [#301](https://github.com/apache/datafusion-python/pull/301) (andygrove) - -## [20.0.0](https://github.com/apache/datafusion-python/tree/20.0.0) (2023-03-17) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/0.8.0...20.0.0) - -**Implemented enhancements:** - -- Empty relation bindings [#208](https://github.com/apache/datafusion-python/pull/208) (jdye64) -- wrap display_name and canonical_name functions [#214](https://github.com/apache/datafusion-python/pull/214) (jdye64) -- Add PyAlias bindings [#216](https://github.com/apache/datafusion-python/pull/216) (jdye64) -- Add bindings for scalar_variable [#218](https://github.com/apache/datafusion-python/pull/218) (jdye64) -- Bindings for LIKE type expressions [#220](https://github.com/apache/datafusion-python/pull/220) (jdye64) -- Bool expr bindings [#223](https://github.com/apache/datafusion-python/pull/223) (jdye64) -- Between bindings [#229](https://github.com/apache/datafusion-python/pull/229) (jdye64) -- Add bindings for GetIndexedField [#227](https://github.com/apache/datafusion-python/pull/227) (jdye64) -- Add bindings for case, cast, and trycast [#232](https://github.com/apache/datafusion-python/pull/232) (jdye64) -- add remaining expr bindings [#233](https://github.com/apache/datafusion-python/pull/233) (jdye64) -- feature: Additional export methods [#236](https://github.com/apache/datafusion-python/pull/236) (simicd) -- Add Python wrapper for LogicalPlan::Union [#240](https://github.com/apache/datafusion-python/pull/240) (iajoiner) -- feature: Create dataframe from pandas, polars, dictionary, list or pyarrow Table [#242](https://github.com/apache/datafusion-python/pull/242) (simicd) -- Add Python wrappers for `LogicalPlan::Join` and `LogicalPlan::CrossJoin` [#246](https://github.com/apache/datafusion-python/pull/246) (iajoiner) -- feature: Set table name from ctx functions [#260](https://github.com/apache/datafusion-python/pull/260) (simicd) -- Explain bindings [#264](https://github.com/apache/datafusion-python/pull/264) (jdye64) -- Extension bindings [#266](https://github.com/apache/datafusion-python/pull/266) (jdye64) -- Subquery alias bindings [#269](https://github.com/apache/datafusion-python/pull/269) (jdye64) -- Create memory table [#271](https://github.com/apache/datafusion-python/pull/271) (jdye64) -- Create view bindings [#273](https://github.com/apache/datafusion-python/pull/273) (jdye64) -- Re-export Datafusion dependencies [#277](https://github.com/apache/datafusion-python/pull/277) (jdye64) -- Distinct bindings [#275](https://github.com/apache/datafusion-python/pull/275) (jdye64) -- Drop table bindings [#283](https://github.com/apache/datafusion-python/pull/283) (jdye64) -- Bindings for LogicalPlan::Repartition [#285](https://github.com/apache/datafusion-python/pull/285) (jdye64) -- Expand Rust return type support for Arrow DataTypes in ScalarValue [#287](https://github.com/apache/datafusion-python/pull/287) (jdye64) - -**Documentation updates:** - -- docs: Example of calling Python UDF & UDAF in SQL [#258](https://github.com/apache/datafusion-python/pull/258) (simicd) - -**Merged pull requests:** - -- Minor docs updates [#210](https://github.com/apache/datafusion-python/pull/210) (andygrove) -- Empty relation bindings [#208](https://github.com/apache/datafusion-python/pull/208) (jdye64) -- wrap display_name and canonical_name functions [#214](https://github.com/apache/datafusion-python/pull/214) (jdye64) -- Add PyAlias bindings [#216](https://github.com/apache/datafusion-python/pull/216) (jdye64) -- Add bindings for scalar_variable [#218](https://github.com/apache/datafusion-python/pull/218) (jdye64) -- Bindings for LIKE type expressions [#220](https://github.com/apache/datafusion-python/pull/220) (jdye64) -- Bool expr bindings [#223](https://github.com/apache/datafusion-python/pull/223) (jdye64) -- Between bindings [#229](https://github.com/apache/datafusion-python/pull/229) (jdye64) -- Add bindings for GetIndexedField [#227](https://github.com/apache/datafusion-python/pull/227) (jdye64) -- Add bindings for case, cast, and trycast [#232](https://github.com/apache/datafusion-python/pull/232) (jdye64) -- add remaining expr bindings [#233](https://github.com/apache/datafusion-python/pull/233) (jdye64) -- Pre-commit hooks [#228](https://github.com/apache/datafusion-python/pull/228) (jdye64) -- Implement new release process [#149](https://github.com/apache/datafusion-python/pull/149) (andygrove) -- feature: Additional export methods [#236](https://github.com/apache/datafusion-python/pull/236) (simicd) -- Add Python wrapper for LogicalPlan::Union [#240](https://github.com/apache/datafusion-python/pull/240) (iajoiner) -- feature: Create dataframe from pandas, polars, dictionary, list or pyarrow Table [#242](https://github.com/apache/datafusion-python/pull/242) (simicd) -- Fix release instructions [#238](https://github.com/apache/datafusion-python/pull/238) (andygrove) -- Add Python wrappers for `LogicalPlan::Join` and `LogicalPlan::CrossJoin` [#246](https://github.com/apache/datafusion-python/pull/246) (iajoiner) -- docs: Example of calling Python UDF & UDAF in SQL [#258](https://github.com/apache/datafusion-python/pull/258) (simicd) -- feature: Set table name from ctx functions [#260](https://github.com/apache/datafusion-python/pull/260) (simicd) -- Upgrade to DataFusion 19 [#262](https://github.com/apache/datafusion-python/pull/262) (andygrove) -- Explain bindings [#264](https://github.com/apache/datafusion-python/pull/264) (jdye64) -- Extension bindings [#266](https://github.com/apache/datafusion-python/pull/266) (jdye64) -- Subquery alias bindings [#269](https://github.com/apache/datafusion-python/pull/269) (jdye64) -- Create memory table [#271](https://github.com/apache/datafusion-python/pull/271) (jdye64) -- Create view bindings [#273](https://github.com/apache/datafusion-python/pull/273) (jdye64) -- Re-export Datafusion dependencies [#277](https://github.com/apache/datafusion-python/pull/277) (jdye64) -- Distinct bindings [#275](https://github.com/apache/datafusion-python/pull/275) (jdye64) -- build(deps): bump actions/checkout from 2 to 3 [#244](https://github.com/apache/datafusion-python/pull/244) (dependabot[bot]) -- build(deps): bump actions/upload-artifact from 2 to 3 [#245](https://github.com/apache/datafusion-python/pull/245) (dependabot[bot]) -- build(deps): bump actions/download-artifact from 2 to 3 [#243](https://github.com/apache/datafusion-python/pull/243) (dependabot[bot]) -- Use DataFusion 20 [#278](https://github.com/apache/datafusion-python/pull/278) (andygrove) -- Drop table bindings [#283](https://github.com/apache/datafusion-python/pull/283) (jdye64) -- Bindings for LogicalPlan::Repartition [#285](https://github.com/apache/datafusion-python/pull/285) (jdye64) -- Expand Rust return type support for Arrow DataTypes in ScalarValue [#287](https://github.com/apache/datafusion-python/pull/287) (jdye64) - -## [0.8.0](https://github.com/apache/datafusion-python/tree/0.8.0) (2023-02-22) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/0.8.0-rc1...0.8.0) - -**Implemented enhancements:** - -- Add support for cuDF physical execution engine [\#202](https://github.com/apache/datafusion-python/issues/202) -- Make it easier to create a Pandas dataframe from DataFusion query results [\#139](https://github.com/apache/datafusion-python/issues/139) - -**Fixed bugs:** - -- Build error: could not compile `thiserror` due to 2 previous errors [\#69](https://github.com/apache/datafusion-python/issues/69) - -**Closed issues:** - -- Integrate with the new `object_store` crate [\#22](https://github.com/apache/datafusion-python/issues/22) - -**Merged pull requests:** - -- Update README in preparation for 0.8 release [\#206](https://github.com/apache/datafusion-python/pull/206) ([andygrove](https://github.com/andygrove)) -- Add support for cudf as a physical execution engine [\#205](https://github.com/apache/datafusion-python/pull/205) ([jdye64](https://github.com/jdye64)) -- Run `maturin develop` instead of `cargo build` in verification script [\#200](https://github.com/apache/datafusion-python/pull/200) ([andygrove](https://github.com/andygrove)) -- Add tests for recently added functionality [\#199](https://github.com/apache/datafusion-python/pull/199) ([andygrove](https://github.com/andygrove)) -- Implement `to_pandas()` [\#197](https://github.com/apache/datafusion-python/pull/197) ([simicd](https://github.com/simicd)) -- Add Python wrapper for LogicalPlan::Sort [\#196](https://github.com/apache/datafusion-python/pull/196) ([andygrove](https://github.com/andygrove)) -- Add Python wrapper for LogicalPlan::Aggregate [\#195](https://github.com/apache/datafusion-python/pull/195) ([andygrove](https://github.com/andygrove)) -- Add Python wrapper for LogicalPlan::Limit [\#193](https://github.com/apache/datafusion-python/pull/193) ([andygrove](https://github.com/andygrove)) -- Add Python wrapper for LogicalPlan::Filter [\#192](https://github.com/apache/datafusion-python/pull/192) ([andygrove](https://github.com/andygrove)) -- Add experimental support for executing SQL with Polars and Pandas [\#190](https://github.com/apache/datafusion-python/pull/190) ([andygrove](https://github.com/andygrove)) -- Update changelog for 0.8 release [\#188](https://github.com/apache/datafusion-python/pull/188) ([andygrove](https://github.com/andygrove)) -- Add ability to execute ExecutionPlan and get a stream of RecordBatch [\#186](https://github.com/apache/datafusion-python/pull/186) ([andygrove](https://github.com/andygrove)) -- Dffield bindings [\#185](https://github.com/apache/datafusion-python/pull/185) ([jdye64](https://github.com/jdye64)) -- Add bindings for DFSchema [\#183](https://github.com/apache/datafusion-python/pull/183) ([jdye64](https://github.com/jdye64)) -- test: Window functions [\#182](https://github.com/apache/datafusion-python/pull/182) ([simicd](https://github.com/simicd)) -- Add bindings for Projection [\#180](https://github.com/apache/datafusion-python/pull/180) ([jdye64](https://github.com/jdye64)) -- Table scan bindings [\#178](https://github.com/apache/datafusion-python/pull/178) ([jdye64](https://github.com/jdye64)) -- Make session configurable [\#176](https://github.com/apache/datafusion-python/pull/176) ([andygrove](https://github.com/andygrove)) -- Upgrade to DataFusion 18.0.0 [\#175](https://github.com/apache/datafusion-python/pull/175) ([andygrove](https://github.com/andygrove)) -- Use latest DataFusion rev in preparation for DF 18 release [\#174](https://github.com/apache/datafusion-python/pull/174) ([andygrove](https://github.com/andygrove)) -- Arrow type bindings [\#173](https://github.com/apache/datafusion-python/pull/173) ([jdye64](https://github.com/jdye64)) -- Pyo3 bump [\#171](https://github.com/apache/datafusion-python/pull/171) ([jdye64](https://github.com/jdye64)) -- feature: Add additional aggregation functions [\#170](https://github.com/apache/datafusion-python/pull/170) ([simicd](https://github.com/simicd)) -- Make from_substrait_plan return DataFrame instead of LogicalPlan [\#164](https://github.com/apache/datafusion-python/pull/164) ([andygrove](https://github.com/andygrove)) -- feature: Implement count method [\#163](https://github.com/apache/datafusion-python/pull/163) ([simicd](https://github.com/simicd)) -- CI Fixes [\#162](https://github.com/apache/datafusion-python/pull/162) ([jdye64](https://github.com/jdye64)) -- Upgrade to DataFusion 17 [\#160](https://github.com/apache/datafusion-python/pull/160) ([andygrove](https://github.com/andygrove)) -- feature: Improve string representation of datafusion classes [\#159](https://github.com/apache/datafusion-python/pull/159) ([simicd](https://github.com/simicd)) -- Make PyExecutionPlan.plan public [\#156](https://github.com/apache/datafusion-python/pull/156) ([andygrove](https://github.com/andygrove)) -- Expose methods on logical and execution plans [\#155](https://github.com/apache/datafusion-python/pull/155) ([andygrove](https://github.com/andygrove)) -- Fix clippy for new Rust version [\#154](https://github.com/apache/datafusion-python/pull/154) ([andygrove](https://github.com/andygrove)) -- Add DataFrame methods for accessing plans [\#153](https://github.com/apache/datafusion-python/pull/153) ([andygrove](https://github.com/andygrove)) -- Use DataFusion rev 5238e8c97f998b4d2cb9fab85fb182f325a1a7fb [\#150](https://github.com/apache/datafusion-python/pull/150) ([andygrove](https://github.com/andygrove)) -- build\(deps\): bump async-trait from 0.1.61 to 0.1.62 [\#148](https://github.com/apache/datafusion-python/pull/148) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Rename default branch from master to main [\#147](https://github.com/apache/datafusion-python/pull/147) ([andygrove](https://github.com/andygrove)) -- Substrait bindings [\#145](https://github.com/apache/datafusion-python/pull/145) ([jdye64](https://github.com/jdye64)) -- build\(deps\): bump uuid from 0.8.2 to 1.2.2 [\#143](https://github.com/apache/datafusion-python/pull/143) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Prepare for 0.8.0 release [\#141](https://github.com/apache/datafusion-python/pull/141) ([andygrove](https://github.com/andygrove)) -- Improve README and add more examples [\#137](https://github.com/apache/datafusion-python/pull/137) ([andygrove](https://github.com/andygrove)) -- test: Expand tests for built-in functions [\#129](https://github.com/apache/datafusion-python/pull/129) ([simicd](https://github.com/simicd)) -- build\(deps\): bump object_store from 0.5.2 to 0.5.3 [\#126](https://github.com/apache/datafusion-python/pull/126) ([dependabot[bot]](https://github.com/apps/dependabot)) -- build\(deps\): bump mimalloc from 0.1.32 to 0.1.34 [\#125](https://github.com/apache/datafusion-python/pull/125) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Introduce conda directory containing datafusion-dev.yaml conda enviro… [\#124](https://github.com/apache/datafusion-python/pull/124) ([jdye64](https://github.com/jdye64)) -- build\(deps\): bump bzip2 from 0.4.3 to 0.4.4 [\#121](https://github.com/apache/datafusion-python/pull/121) ([dependabot[bot]](https://github.com/apps/dependabot)) -- build\(deps\): bump tokio from 1.23.0 to 1.24.1 [\#119](https://github.com/apache/datafusion-python/pull/119) ([dependabot[bot]](https://github.com/apps/dependabot)) -- build\(deps\): bump async-trait from 0.1.60 to 0.1.61 [\#118](https://github.com/apache/datafusion-python/pull/118) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Upgrade to DataFusion 16.0.0 [\#115](https://github.com/apache/datafusion-python/pull/115) ([andygrove](https://github.com/andygrove)) -- Bump async-trait from 0.1.57 to 0.1.60 [\#114](https://github.com/apache/datafusion-python/pull/114) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Bump object_store from 0.5.1 to 0.5.2 [\#112](https://github.com/apache/datafusion-python/pull/112) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Bump tokio from 1.21.2 to 1.23.0 [\#109](https://github.com/apache/datafusion-python/pull/109) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Add entries for publishing production \(asf-site\) and staging docs [\#107](https://github.com/apache/datafusion-python/pull/107) ([martin-g](https://github.com/martin-g)) -- Add a workflow that builds the docs and deploys them at staged or production [\#104](https://github.com/apache/datafusion-python/pull/104) ([martin-g](https://github.com/martin-g)) -- Upgrade to DataFusion 15.0.0 [\#103](https://github.com/apache/datafusion-python/pull/103) ([andygrove](https://github.com/andygrove)) -- build\(deps\): bump futures from 0.3.24 to 0.3.25 [\#102](https://github.com/apache/datafusion-python/pull/102) ([dependabot[bot]](https://github.com/apps/dependabot)) -- build\(deps\): bump pyo3 from 0.17.2 to 0.17.3 [\#101](https://github.com/apache/datafusion-python/pull/101) ([dependabot[bot]](https://github.com/apps/dependabot)) -- build\(deps\): bump mimalloc from 0.1.30 to 0.1.32 [\#98](https://github.com/apache/datafusion-python/pull/98) ([dependabot[bot]](https://github.com/apps/dependabot)) -- build\(deps\): bump rand from 0.7.3 to 0.8.5 [\#97](https://github.com/apache/datafusion-python/pull/97) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Fix GitHub actions warnings [\#95](https://github.com/apache/datafusion-python/pull/95) ([martin-g](https://github.com/martin-g)) -- Fixes \#81 - Add CI workflow for source distribution [\#93](https://github.com/apache/datafusion-python/pull/93) ([martin-g](https://github.com/martin-g)) -- post-release updates [\#91](https://github.com/apache/datafusion-python/pull/91) ([andygrove](https://github.com/andygrove)) -- Build for manylinux 2014 [\#88](https://github.com/apache/datafusion-python/pull/88) ([martin-g](https://github.com/martin-g)) -- update release readme tag [\#86](https://github.com/apache/datafusion-python/pull/86) ([Jimexist](https://github.com/Jimexist)) -- Upgrade Maturin to 0.14.2 [\#85](https://github.com/apache/datafusion-python/pull/85) ([martin-g](https://github.com/martin-g)) -- Update release instructions [\#83](https://github.com/apache/datafusion-python/pull/83) ([andygrove](https://github.com/andygrove)) -- \[Functions\] - Add python function binding to `functions` [\#73](https://github.com/apache/datafusion-python/pull/73) ([francis-du](https://github.com/francis-du)) - -## [0.8.0-rc1](https://github.com/apache/datafusion-python/tree/0.8.0-rc1) (2023-02-17) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/0.7.0-rc2...0.8.0-rc1) - -**Implemented enhancements:** - -- Add bindings for datafusion_common::DFField [\#184](https://github.com/apache/datafusion-python/issues/184) -- Add bindings for DFSchema/DFSchemaRef [\#181](https://github.com/apache/datafusion-python/issues/181) -- Add bindings for datafusion_expr Projection [\#179](https://github.com/apache/datafusion-python/issues/179) -- Add bindings for `TableScan` struct from `datafusion_expr::TableScan` [\#177](https://github.com/apache/datafusion-python/issues/177) -- Add a "mapping" struct for types [\#172](https://github.com/apache/datafusion-python/issues/172) -- Improve string representation of datafusion classes \(dataframe, context, expression, ...\) [\#158](https://github.com/apache/datafusion-python/issues/158) -- Add DataFrame count method [\#151](https://github.com/apache/datafusion-python/issues/151) -- \[REQUEST\] Github Actions Improvements [\#146](https://github.com/apache/datafusion-python/issues/146) -- Change default branch name from master to main [\#144](https://github.com/apache/datafusion-python/issues/144) -- Bump pyo3 to 0.18.0 [\#140](https://github.com/apache/datafusion-python/issues/140) -- Add script for Python linting [\#134](https://github.com/apache/datafusion-python/issues/134) -- Add Python bindings for substrait module [\#132](https://github.com/apache/datafusion-python/issues/132) -- Expand unit tests for built-in functions [\#128](https://github.com/apache/datafusion-python/issues/128) -- support creating arrow-datafusion-python conda environment [\#122](https://github.com/apache/datafusion-python/issues/122) -- Build Python source distribution in GitHub workflow [\#81](https://github.com/apache/datafusion-python/issues/81) -- EPIC: Add all functions to python binding `functions` [\#72](https://github.com/apache/datafusion-python/issues/72) - -**Fixed bugs:** - -- Build is broken [\#161](https://github.com/apache/datafusion-python/issues/161) -- Out of memory when sorting [\#157](https://github.com/apache/datafusion-python/issues/157) -- window_lead test appears to be non-deterministic [\#135](https://github.com/apache/datafusion-python/issues/135) -- Reading csv does not work [\#130](https://github.com/apache/datafusion-python/issues/130) -- Github actions produce a lot of warnings [\#94](https://github.com/apache/datafusion-python/issues/94) -- ASF source release tarball has wrong directory name [\#90](https://github.com/apache/datafusion-python/issues/90) -- Python Release Build failing after upgrading to maturin 14.2 [\#87](https://github.com/apache/datafusion-python/issues/87) -- Maturin build hangs on Linux ARM64 [\#84](https://github.com/apache/datafusion-python/issues/84) -- Cannot install on Mac M1 from source tarball from testpypi [\#82](https://github.com/apache/datafusion-python/issues/82) -- ImportPathMismatchError when running pytest locally [\#77](https://github.com/apache/datafusion-python/issues/77) - -**Closed issues:** - -- Publish documentation for Python bindings [\#39](https://github.com/apache/datafusion-python/issues/39) -- Add Python binding for `approx_median` [\#32](https://github.com/apache/datafusion-python/issues/32) -- Release version 0.7.0 [\#7](https://github.com/apache/datafusion-python/issues/7) - -## [0.7.0-rc2](https://github.com/apache/datafusion-python/tree/0.7.0-rc2) (2022-11-26) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/0.7.0...0.7.0-rc2) - -## [Unreleased](https://github.com/datafusion-contrib/datafusion-python/tree/HEAD) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1...HEAD) - -**Merged pull requests:** - -- use \_\_getitem\_\_ for df column selection [\#41](https://github.com/datafusion-contrib/datafusion-python/pull/41) ([Jimexist](https://github.com/Jimexist)) -- fix demo in readme [\#40](https://github.com/datafusion-contrib/datafusion-python/pull/40) ([Jimexist](https://github.com/Jimexist)) -- Implement select_columns [\#39](https://github.com/datafusion-contrib/datafusion-python/pull/39) ([andygrove](https://github.com/andygrove)) -- update readme and changelog [\#38](https://github.com/datafusion-contrib/datafusion-python/pull/38) ([Jimexist](https://github.com/Jimexist)) -- Add PyDataFrame.explain [\#36](https://github.com/datafusion-contrib/datafusion-python/pull/36) ([andygrove](https://github.com/andygrove)) -- Release 0.5.0 [\#34](https://github.com/datafusion-contrib/datafusion-python/pull/34) ([Jimexist](https://github.com/Jimexist)) -- disable nightly in workflow [\#33](https://github.com/datafusion-contrib/datafusion-python/pull/33) ([Jimexist](https://github.com/Jimexist)) -- update requirements to 37 and 310, update readme [\#32](https://github.com/datafusion-contrib/datafusion-python/pull/32) ([Jimexist](https://github.com/Jimexist)) -- Add custom global allocator [\#30](https://github.com/datafusion-contrib/datafusion-python/pull/30) ([matthewmturner](https://github.com/matthewmturner)) -- Remove pandas dependency [\#25](https://github.com/datafusion-contrib/datafusion-python/pull/25) ([matthewmturner](https://github.com/matthewmturner)) -- upgrade datafusion and pyo3 [\#20](https://github.com/datafusion-contrib/datafusion-python/pull/20) ([Jimexist](https://github.com/Jimexist)) -- update maturin 0.12+ [\#17](https://github.com/datafusion-contrib/datafusion-python/pull/17) ([Jimexist](https://github.com/Jimexist)) -- Update README.md [\#16](https://github.com/datafusion-contrib/datafusion-python/pull/16) ([Jimexist](https://github.com/Jimexist)) -- apply cargo clippy --fix [\#15](https://github.com/datafusion-contrib/datafusion-python/pull/15) ([Jimexist](https://github.com/Jimexist)) -- update test workflow to include rust clippy and check [\#14](https://github.com/datafusion-contrib/datafusion-python/pull/14) ([Jimexist](https://github.com/Jimexist)) -- use maturin 0.12.6 [\#13](https://github.com/datafusion-contrib/datafusion-python/pull/13) ([Jimexist](https://github.com/Jimexist)) -- apply cargo fmt [\#12](https://github.com/datafusion-contrib/datafusion-python/pull/12) ([Jimexist](https://github.com/Jimexist)) -- use stable not nightly [\#11](https://github.com/datafusion-contrib/datafusion-python/pull/11) ([Jimexist](https://github.com/Jimexist)) -- ci: test against more compilers, setup clippy and fix clippy lints [\#9](https://github.com/datafusion-contrib/datafusion-python/pull/9) ([cpcloud](https://github.com/cpcloud)) -- Fix use of importlib.metadata and unify requirements.txt [\#8](https://github.com/datafusion-contrib/datafusion-python/pull/8) ([cpcloud](https://github.com/cpcloud)) -- Ship the Cargo.lock file in the source distribution [\#7](https://github.com/datafusion-contrib/datafusion-python/pull/7) ([cpcloud](https://github.com/cpcloud)) -- add \_\_version\_\_ attribute to datafusion object [\#3](https://github.com/datafusion-contrib/datafusion-python/pull/3) ([tfeda](https://github.com/tfeda)) -- fix ci by fixing directories [\#2](https://github.com/datafusion-contrib/datafusion-python/pull/2) ([Jimexist](https://github.com/Jimexist)) -- setup workflow [\#1](https://github.com/datafusion-contrib/datafusion-python/pull/1) ([Jimexist](https://github.com/Jimexist)) - -## [0.5.1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1) (2022-03-15) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1-rc1...0.5.1) - -## [0.5.1-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1-rc1) (2022-03-15) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0...0.5.1-rc1) - -## [0.5.0](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0) (2022-03-10) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc2...0.5.0) - -## [0.5.0-rc2](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc2) (2022-03-10) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc1...0.5.0-rc2) - -**Closed issues:** - -- Add support for Ballista [\#37](https://github.com/datafusion-contrib/datafusion-python/issues/37) -- Implement DataFrame.explain [\#35](https://github.com/datafusion-contrib/datafusion-python/issues/35) - -## [0.5.0-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc1) (2022-03-09) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/4c98b8e9c3c3f8e2e6a8f2d1ffcfefda344c4680...0.5.0-rc1) - -**Closed issues:** - -- Investigate exposing additional optimizations [\#28](https://github.com/datafusion-contrib/datafusion-python/issues/28) -- Use custom allocator in Python build [\#27](https://github.com/datafusion-contrib/datafusion-python/issues/27) -- Why is pandas a requirement? [\#24](https://github.com/datafusion-contrib/datafusion-python/issues/24) -- Unable to build [\#18](https://github.com/datafusion-contrib/datafusion-python/issues/18) -- Setup CI against multiple Python version [\#6](https://github.com/datafusion-contrib/datafusion-python/issues/6) diff --git a/dev/check_crates_patch.py b/dev/check_crates_patch.py deleted file mode 100644 index 74e489e1f..000000000 --- a/dev/check_crates_patch.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python3 -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Check that no Cargo.toml files contain [patch.crates-io] entries. - -Release builds must not depend on patched crates. During development it is -common to temporarily patch crates-io dependencies, but those patches must -be removed before creating a release. - -An empty [patch.crates-io] section is allowed. -""" - -import sys -from pathlib import Path - -import tomllib - - -def main() -> int: - errors: list[str] = [] - for cargo_toml in sorted(Path().rglob("Cargo.toml")): - if "target" in cargo_toml.parts: - continue - with Path.open(cargo_toml, "rb") as f: - data = tomllib.load(f) - patch = data.get("patch", {}).get("crates-io", {}) - if patch: - errors.append(str(cargo_toml)) - for name, spec in patch.items(): - errors.append(f" {name} = {spec}") - - if errors: - print("ERROR: Release builds must not contain [patch.crates-io] entries.") - print() - for line in errors: - print(line) - print() - print("Remove all [patch.crates-io] entries before creating a release.") - return 1 - - print("OK: No [patch.crates-io] entries found.") - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/dev/clean.sh b/dev/clean.sh deleted file mode 100755 index 0d86680e8..000000000 --- a/dev/clean.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -# This cleans up the project by removing build artifacts and other generated files. - -# Function to remove a directory and print the action -remove_dir() { - if [ -d "$1" ]; then - echo "Removing directory: $1" - rm -rf "$1" - fi -} - -# Function to remove a file and print the action -remove_file() { - if [ -f "$1" ]; then - echo "Removing file: $1" - rm -f "$1" - fi -} - -# Remove .pytest_cache directory -remove_dir .pytest_cache/ - -# Remove target directory -remove_dir target/ - -# Remove any __pycache__ directories -find python/ -type d -name "__pycache__" -print | while read -r dir; do - remove_dir "$dir" -done - -# Remove pytest-coverage.lcov file -# remove_file .coverage -# remove_file pytest-coverage.lcov - -# Remove rust-coverage.lcov file -# remove_file rust-coverage.lcov - -# Remove pyo3 files -find python/ -type f -name '_internal.*.so' -print | while read -r file; do - remove_file "$file" -done - -echo "Cleanup complete." \ No newline at end of file diff --git a/dev/create_license.py b/dev/create_license.py deleted file mode 100644 index acbf8587c..000000000 --- a/dev/create_license.py +++ /dev/null @@ -1,251 +0,0 @@ -#!/usr/bin/python -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This file is a mirror of https://github.com/apache/arrow-datafusion/blob/master/dev/create_license.py - -import json -import subprocess -from pathlib import Path - -data = subprocess.check_output( - [ - "cargo-license", - "--avoid-build-deps", - "--avoid-dev-deps", - "--do-not-bundle", - "--json", - ] -) -data = json.loads(data) - -result = """ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" -result += "\n------------------\n\n" -result += "This software is built and contains the following software:\n\n" -result += "(automatically generated via [cargo-license](https://crates.io/crates/cargo-license))\n\n" -for item in data: - license = item["license"] - name = item["name"] - version = item["version"] - repository = item["repository"] - result += "------------------\n\n" - result += f"### {name} {version}\n* source: [{repository}]({repository})\n* license: {license}\n\n" - -with Path.open("LICENSE.txt", "w") as f: - f.write(result) diff --git a/dev/python_lint.sh b/dev/python_lint.sh deleted file mode 100755 index 2d867f29d..000000000 --- a/dev/python_lint.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# This script runs all the Rust lints locally the same way the -# DataFusion CI does - -set -e -source .venv/bin/activate -flake8 --exclude venv,benchmarks/db-benchmark --ignore=E501,W503 -black --line-length 79 . diff --git a/dev/release/README.md b/dev/release/README.md deleted file mode 100644 index ed28f4aa6..000000000 --- a/dev/release/README.md +++ /dev/null @@ -1,303 +0,0 @@ - - -# DataFusion Python Release Process - -Development happens on the `main` branch, and most of the time, we depend on DataFusion using GitHub dependencies -rather than using an official release from crates.io. This allows us to pick up new features and bug fixes frequently -by creating PRs to move to a later revision of the code. It also means we can incrementally make updates that are -required due to changes in DataFusion rather than having a large amount of work to do when the next official release -is available. - -When there is a new official release of DataFusion, we update the `main` branch to point to that, update the version -number, and create a new release branch, such as `branch-0.8`. Once this branch is created, we switch the `main` branch -back to using GitHub dependencies. The release activity (such as generating the changelog) can then happen on the -release branch without blocking ongoing development in the `main` branch. - -We can cherry-pick commits from the `main` branch into `branch-0.8` as needed and then create new patch releases -from that branch. - -## Detailed Guide - -### Pre-requisites - -Releases can currently only be created by PMC members due to the permissions needed. - -You will need a GitHub Personal Access Token. Follow -[these instructions](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) -to generate one if you do not already have one. - -You will need a PyPI API token. Create one at https://test.pypi.org/manage/account/#api-tokens, setting the “Scope” to -“Entire account”. - -You will also need access to the [datafusion](https://test.pypi.org/project/datafusion/) project on testpypi. - -### Preparing the `main` Branch - -Before creating a new release: - -- We need to ensure that the main branch does not have any GitHub dependencies -- a PR should be created and merged to update the major version number of the project -- A new release branch should be created, such as `branch-0.8` - -## Preparing a Release Candidate - -### Change Log - -We maintain a `CHANGELOG.md` so our users know what has been changed between releases. - -The changelog is generated using a Python script: - -```bash -$ GITHUB_TOKEN= ./dev/release/generate-changelog.py 24.0.0 HEAD 25.0.0 > dev/changelog/25.0.0.md -``` - -This script creates a changelog from GitHub PRs based on the labels associated with them as well as looking for -titles starting with `feat:`, `fix:`, or `docs:` . The script will produce output similar to: - -``` -Fetching list of commits between 24.0.0 and HEAD -Fetching pull requests -Categorizing pull requests -Generating changelog content -``` - -### Update the version number - -The only place you should need to update the version is in the root `Cargo.toml`. -After updating the toml file, run `cargo update` to update the cargo lock file. -If you do not want to update all the dependencies, you can instead run `cargo build` -which should only update the version number for `datafusion-python`. - -### Tag the Repository - -Commit the changes to the changelog and version. - -Assuming you have set up a remote to the `apache` repository rather than your personal fork, -you need to push a tag to start the CI process for release candidates. The following assumes -the upstream repository is called `apache`. - -```bash -git tag 0.8.0-rc1 -git push apache 0.8.0-rc1 -``` - -### Create a source release - -```bash -./dev/release/create-tarball.sh 0.8.0 1 -``` - -This will also create the email template to send to the mailing list. - -Create a draft email using this content, but do not send until after completing the next step. - -### Publish Python Artifacts to testpypi - -This section assumes some familiarity with publishing Python packages to PyPi. For more information, refer to \ -[this tutorial](https://packaging.python.org/en/latest/tutorials/packaging-projects/#uploading-the-distribution-archives). - -#### Publish Python Wheels to testpypi - -Pushing an `rc` tag to the release branch will cause a GitHub Workflow to run that will build the Python wheels. - -Go to https://github.com/apache/datafusion-python/actions and look for an action named "Python Release Build" -that has run against the pushed tag. - -Click on the action and scroll down to the bottom of the page titled "Artifacts". Download `dist.zip`. It should -contain files such as: - -```text -datafusion-22.0.0-cp37-abi3-macosx_10_7_x86_64.whl -datafusion-22.0.0-cp37-abi3-macosx_11_0_arm64.whl -datafusion-22.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl -datafusion-22.0.0-cp37-abi3-win_amd64.whl -``` - -Upload the wheels to testpypi. - -```bash -unzip dist.zip -python3 -m pip install --upgrade setuptools twine build -python3 -m twine upload --repository testpypi datafusion-22.0.0-cp37-abi3-*.whl -``` - -When prompted for username, enter `__token__`. When prompted for a password, enter a valid GitHub Personal Access Token - -#### Publish Python Source Distribution to testpypi - -Download the source tarball created in the previous step, untar it, and run: - -```bash -maturin sdist -``` - -This will create a file named `dist/datafusion-0.7.0.tar.gz`. Upload this to testpypi: - -```bash -python3 -m twine upload --repository testpypi dist/datafusion-0.7.0.tar.gz -``` - -### Run Verify Release Candidate Workflow - -Before sending the vote email, run the manually triggered GitHub Actions workflow -"Verify Release Candidate" and confirm all matrix jobs pass across the OS/architecture matrix -(for example, Linux, macOS, and Windows runners): - -1. Go to https://github.com/apache/datafusion-python/actions/workflows/verify-release-candidate.yml -2. Click "Run workflow" -3. Set `version` to the release version (for example, `52.0.0`) -4. Set `rc_number` to the RC number (for example, `0`) -5. Wait for all jobs to complete successfully - -Include a short note in the vote email template that this workflow was run across all OS/architecture -matrix entries and that all jobs passed. - -```text -Verification note: The manually triggered "Verify Release Candidate" workflow was run for version and rc_number across all configured OS/architecture matrix entries, and all matrix jobs completed successfully. -``` - -### Send the Email - -Send the email to start the vote. - -## Verifying a Release - -Releases may be verified using `verify-release-candidate.sh`: - -```bash -git clone https://github.com/apache/datafusion-python.git -dev/release/verify-release-candidate.sh 48.0.0 1 -``` - -Alternatively, one can run unit tests against a testpypi release candidate: - -```bash -# clone a fresh repo -git clone https://github.com/apache/datafusion-python.git -cd datafusion-python - -# checkout the release commit -git fetch --tags -git checkout 40.0.0-rc1 -git submodule update --init --recursive - -# create the env -python3 -m venv .venv -source .venv/bin/activate - -# install release candidate -pip install --extra-index-url https://test.pypi.org/simple/ datafusion==40.0.0 - -# install test dependencies -pip install pytest numpy pytest-asyncio - -# run the tests -pytest --import-mode=importlib python/tests -vv -``` - -Try running one of the examples from the top-level README, or write some custom Python code to query some available -data files. - -## Publishing a Release - -### Publishing Apache Source Release - -Once the vote passes, we can publish the release. - -Create the source release tarball: - -```bash -./dev/release/release-tarball.sh 0.8.0 1 -``` - -### Publishing Rust Crate to crates.io - -Some projects depend on the Rust crate directly, so we publish this to crates.io - -```shell -cargo publish -``` - -### Publishing Python Artifacts to PyPi - -Go to the Test PyPI page of Datafusion, and download -[all published artifacts](https://test.pypi.org/project/datafusion/#files) under `dist-release/` directory. Then proceed -uploading them using `twine`: - -```bash -twine upload --repository pypi dist-release/* -``` - -### Publish Python Artifacts to conda-forge - -Pypi packages auto upload to conda-forge via [datafusion feedstock](https://github.com/conda-forge/datafusion-feedstock) - -### Push the Release Tag - -```bash -git checkout 0.8.0-rc1 -git tag 0.8.0 -git push apache 0.8.0 -``` - -### Add the release to Apache Reporter - -Add the release to https://reporter.apache.org/addrelease.html?datafusion with a version name prefixed with `DATAFUSION-PYTHON`, -for example `DATAFUSION-PYTHON-31.0.0`. - -The release information is used to generate a template for a board report (see example from Apache Arrow -[here](https://github.com/apache/arrow/pull/14357)). - -### Delete old RCs and Releases - -See the ASF documentation on [when to archive](https://www.apache.org/legal/release-policy.html#when-to-archive) -for more information. - -#### Deleting old release candidates from `dev` svn - -Release candidates should be deleted once the release is published. - -Get a list of DataFusion release candidates: - -```bash -svn ls https://dist.apache.org/repos/dist/dev/datafusion | grep datafusion-python -``` - -Delete a release candidate: - -```bash -svn delete -m "delete old DataFusion RC" https://dist.apache.org/repos/dist/dev/datafusion/apache-datafusion-python-7.1.0-rc1/ -``` - -#### Deleting old releases from `release` svn - -Only the latest release should be available. Delete old releases after publishing the new release. - -Get a list of DataFusion releases: - -```bash -svn ls https://dist.apache.org/repos/dist/release/datafusion | grep datafusion-python -``` - -Delete a release: - -```bash -svn delete -m "delete old DataFusion release" https://dist.apache.org/repos/dist/release/datafusion/datafusion-python-7.0.0 -``` diff --git a/dev/release/check-rat-report.py b/dev/release/check-rat-report.py deleted file mode 100644 index 72a35212e..000000000 --- a/dev/release/check-rat-report.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/python -############################################################################## -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -############################################################################## -import fnmatch -import re -import sys -import xml.etree.ElementTree as ET -from pathlib import Path - -if len(sys.argv) != 3: - sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" % sys.argv[0]) - sys.exit(1) - -exclude_globs_filename = sys.argv[1] -xml_filename = sys.argv[2] - -globs = [line.strip() for line in Path.open(exclude_globs_filename)] - -tree = ET.parse(xml_filename) -root = tree.getroot() -resources = root.findall("resource") - -all_ok = True -for r in resources: - approvals = r.findall("license-approval") - if not approvals or approvals[0].attrib["name"] == "true": - continue - clean_name = re.sub("^[^/]+/", "", r.attrib["name"]) - excluded = False - for g in globs: - if fnmatch.fnmatch(clean_name, g): - excluded = True - break - if not excluded: - sys.stdout.write( - "NOT APPROVED: %s (%s): %s\n" - % (clean_name, r.attrib["name"], approvals[0].attrib["name"]) - ) - all_ok = False - -if not all_ok: - sys.exit(1) - -print("OK") -sys.exit(0) diff --git a/dev/release/create-tarball.sh b/dev/release/create-tarball.sh deleted file mode 100755 index d6ca76561..000000000 --- a/dev/release/create-tarball.sh +++ /dev/null @@ -1,138 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -# Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/create-tarball.sh - -# This script creates a signed tarball in -# dev/dist/apache-datafusion-python--.tar.gz and uploads it to -# the "dev" area of the dist.apache.arrow repository and prepares an -# email for sending to the dev@datafusion.apache.org list for a formal -# vote. -# -# See release/README.md for full release instructions -# -# Requirements: -# -# 1. gpg setup for signing and have uploaded your public -# signature to https://pgp.mit.edu/ -# -# 2. Logged into the apache svn server with the appropriate -# credentials -# -# 3. Install the requests python package -# -# -# Based in part on 02-source.sh from apache/arrow -# - -set -e - -SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" - -if [ "$#" -ne 2 ]; then - echo "Usage: $0 " - echo "ex. $0 4.1.0 2" - exit -fi - -if [[ -z "${GH_TOKEN}" ]]; then - echo "Please set personal github token through GH_TOKEN environment variable" - exit -fi - -version=$1 -rc=$2 -tag="${version}-rc${rc}" - -echo "Attempting to create ${tarball} from tag ${tag}" -release_hash=$(cd "${SOURCE_TOP_DIR}" && git rev-list --max-count=1 ${tag}) - -release=apache-datafusion-python-${version} -distdir=${SOURCE_TOP_DIR}/dev/dist/${release}-rc${rc} -tarname=${release}.tar.gz -tarball=${distdir}/${tarname} -url="https://dist.apache.org/repos/dist/dev/datafusion/${release}-rc${rc}" - -if [ -z "$release_hash" ]; then - echo "Cannot continue: unknown git tag: ${tag}" -fi - -echo "Draft email for dev@datafusion.apache.org mailing list" -echo "" -echo "---------------------------------------------------------" -cat < containing the files in git at $release_hash -# the files in the tarball are prefixed with {version} (e.g. 4.0.1) -mkdir -p ${distdir} -(cd "${SOURCE_TOP_DIR}" && git archive ${release_hash} --prefix ${release}/ | gzip > ${tarball}) - -echo "Running rat license checker on ${tarball}" -${SOURCE_DIR}/run-rat.sh ${tarball} - -echo "Signing tarball and creating checksums" -gpg --armor --output ${tarball}.asc --detach-sig ${tarball} -# create signing with relative path of tarball -# so that they can be verified with a command such as -# shasum --check apache-datafusion-python-4.1.0-rc2.tar.gz.sha512 -(cd ${distdir} && shasum -a 256 ${tarname}) > ${tarball}.sha256 -(cd ${distdir} && shasum -a 512 ${tarname}) > ${tarball}.sha512 - - -echo "Uploading to datafusion dist/dev to ${url}" -svn co --depth=empty https://dist.apache.org/repos/dist/dev/datafusion ${SOURCE_TOP_DIR}/dev/dist -svn add ${distdir} -svn ci -m "Apache DataFusion Python ${version} ${rc}" ${distdir} diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py deleted file mode 100755 index d86736773..000000000 --- a/dev/release/generate-changelog.py +++ /dev/null @@ -1,179 +0,0 @@ -#!/usr/bin/env python - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import re -import subprocess -import sys - -from github import Github - - -def print_pulls(repo_name, title, pulls) -> None: - if len(pulls) > 0: - print(f"**{title}:**") - print() - for pull, commit in pulls: - url = f"https://github.com/{repo_name}/pull/{pull.number}" - print(f"- {pull.title} [#{pull.number}]({url}) ({commit.author.login})") - print() - - -def generate_changelog(repo, repo_name, tag1, tag2, version) -> None: - # get a list of commits between two tags - print(f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr) - comparison = repo.compare(tag1, tag2) - - # get the pull requests for these commits - print("Fetching pull requests", file=sys.stderr) - unique_pulls = [] - all_pulls = [] - for commit in comparison.commits: - pulls = commit.get_pulls() - for pull in pulls: - # there can be multiple commits per PR if squash merge is not being used and - # in this case we should get all the author names, but for now just pick one - if pull.number not in unique_pulls: - unique_pulls.append(pull.number) - all_pulls.append((pull, commit)) - - # we split the pulls into categories - breaking = [] - bugs = [] - docs = [] - enhancements = [] - performance = [] - other = [] - - # categorize the pull requests based on GitHub labels - print("Categorizing pull requests", file=sys.stderr) - for pull, commit in all_pulls: - # see if PR title uses Conventional Commits - cc_type = "" - cc_breaking = "" - parts = re.findall(r"^([a-z]+)(\([a-z]+\))?(!)?:", pull.title) - if len(parts) == 1: - parts_tuple = parts[0] - cc_type = parts_tuple[0] # fix, feat, docs, chore - # cc_scope = parts_tuple[1] # component within project - cc_breaking = parts_tuple[2] == "!" - - labels = [label.name for label in pull.labels] - if "api change" in labels or cc_breaking: - breaking.append((pull, commit)) - elif "bug" in labels or cc_type == "fix": - bugs.append((pull, commit)) - elif "performance" in labels or cc_type == "perf": - performance.append((pull, commit)) - elif "enhancement" in labels or cc_type == "feat": - enhancements.append((pull, commit)) - elif "documentation" in labels or cc_type == "docs" or cc_type == "doc": - docs.append((pull, commit)) - else: - other.append((pull, commit)) - - # produce the changelog content - print("Generating changelog content", file=sys.stderr) - - # ASF header - print("""\n""") - - print(f"# Apache DataFusion Python {version} Changelog\n") - - # get the number of commits - commit_count = subprocess.check_output( - f"git log --pretty=oneline {tag1}..{tag2} | wc -l", shell=True, text=True - ).strip() - - # get number of contributors - contributor_count = subprocess.check_output( - f"git shortlog -sn {tag1}..{tag2} | wc -l", shell=True, text=True - ).strip() - - print( - f"This release consists of {commit_count} commits from {contributor_count} contributors. " - f"See credits at the end of this changelog for more information.\n" - ) - - print_pulls(repo_name, "Breaking changes", breaking) - print_pulls(repo_name, "Performance related", performance) - print_pulls(repo_name, "Implemented enhancements", enhancements) - print_pulls(repo_name, "Fixed bugs", bugs) - print_pulls(repo_name, "Documentation updates", docs) - print_pulls(repo_name, "Other", other) - - # show code contributions - credits = subprocess.check_output( - f"git shortlog -sn {tag1}..{tag2}", shell=True, text=True - ).rstrip() - - print("## Credits\n") - print( - "Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) " - "per contributor.\n" - ) - print("```") - print(credits) - print("```\n") - - print( - "Thank you also to everyone who contributed in other ways such as filing issues, reviewing " - "PRs, and providing feedback on this release.\n" - ) - - -def cli(args=None) -> None: - """Process command line arguments.""" - if not args: - args = sys.argv[1:] - - parser = argparse.ArgumentParser() - parser.add_argument("tag1", help="The previous commit or tag (e.g. 0.1.0)") - parser.add_argument("tag2", help="The current commit or tag (e.g. HEAD)") - parser.add_argument( - "version", help="The version number to include in the changelog" - ) - args = parser.parse_args() - - token = os.getenv("GITHUB_TOKEN") - project = "apache/datafusion-python" - - g = Github(token) - repo = g.get_repo(project) - generate_changelog(repo, project, args.tag1, args.tag2, args.version) - - -if __name__ == "__main__": - cli() diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt deleted file mode 100644 index dcd5d9aac..000000000 --- a/dev/release/rat_exclude_files.txt +++ /dev/null @@ -1,50 +0,0 @@ -*.npmrc -*.gitignore -*.dockerignore -.gitmodules -*_generated.h -*_generated.js -*_generated.ts -*.csv -*.json -*.snap -.github/ISSUE_TEMPLATE/*.md -.github/pull_request_template.md -CHANGELOG.md -dev/release/rat_exclude_files.txt -MANIFEST.in -__init__.pxd -__init__.py -*.html -*.sgml -*.css -*.png -*.ico -*.svg -*.devhelp2 -*.scss -.gitattributes -requirements.txt -*requirements*.txt -**/testdata/* -ci/* -**/*.svg -**/*.csv -**/*.json -**/*.sql -venv/* -parquet/* -testing/* -target/* -**/target/* -Cargo.lock -**/Cargo.lock -.history -*rat.txt -*/.git -.github/* -benchmarks/tpch/queries/q*.sql -benchmarks/tpch/create_tables.sql -.cargo/config.toml -**/.cargo/config.toml -uv.lock \ No newline at end of file diff --git a/dev/release/release-tarball.sh b/dev/release/release-tarball.sh deleted file mode 100755 index 2b82d1bac..000000000 --- a/dev/release/release-tarball.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -# Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/release-tarball.sh - -# This script copies a tarball from the "dev" area of the -# dist.apache.arrow repository to the "release" area -# -# This script should only be run after the release has been approved -# by the arrow PMC committee. -# -# See release/README.md for full release instructions -# -# Based in part on post-01-upload.sh from apache/arrow - - -set -e -set -u - -if [ "$#" -ne 2 ]; then - echo "Usage: $0 " - echo "ex. $0 4.1.0 2" - exit -fi - -version=$1 -rc=$2 - -read -r -p "Proceed to release tarball for ${version}-rc${rc}? [y/N]: " answer -answer=${answer:-no} -if [ "${answer}" != "y" ]; then - echo "Cancelled tarball release!" - exit 1 -fi - -tmp_dir=tmp-apache-datafusion-python-dist - -echo "Recreate temporary directory: ${tmp_dir}" -rm -rf ${tmp_dir} -mkdir -p ${tmp_dir} - -echo "Clone dev dist repository" -svn \ - co \ - https://dist.apache.org/repos/dist/dev/datafusion/apache-datafusion-python-${version}-rc${rc} \ - ${tmp_dir}/dev - -echo "Clone release dist repository" -svn co https://dist.apache.org/repos/dist/release/datafusion ${tmp_dir}/release - -echo "Copy ${version}-rc${rc} to release working copy" -release_version=datafusion-python-${version} -mkdir -p ${tmp_dir}/release/${release_version} -cp -r ${tmp_dir}/dev/* ${tmp_dir}/release/${release_version}/ -svn add ${tmp_dir}/release/${release_version} - -echo "Commit release" -svn ci -m "Apache DataFusion Python ${version}" ${tmp_dir}/release - -echo "Clean up" -rm -rf ${tmp_dir} - -echo "Success! The release is available here:" -echo " https://dist.apache.org/repos/dist/release/datafusion/${release_version}" diff --git a/dev/release/run-rat.sh b/dev/release/run-rat.sh deleted file mode 100755 index 94fa55fbe..000000000 --- a/dev/release/run-rat.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -RAT_VERSION=0.13 - -# download apache rat -if [ ! -f apache-rat-${RAT_VERSION}.jar ]; then - curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar > apache-rat-${RAT_VERSION}.jar -fi - -RAT="java -jar apache-rat-${RAT_VERSION}.jar -x " - -RELEASE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) - -# generate the rat report -$RAT $1 > rat.txt -python $RELEASE_DIR/check-rat-report.py $RELEASE_DIR/rat_exclude_files.txt rat.txt > filtered_rat.txt -cat filtered_rat.txt -UNAPPROVED=`cat filtered_rat.txt | grep "NOT APPROVED" | wc -l` - -if [ "0" -eq "${UNAPPROVED}" ]; then - echo "No unapproved licenses" -else - echo "${UNAPPROVED} unapproved licences. Check rat report: rat.txt" - exit 1 -fi diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh deleted file mode 100755 index 9591e0335..000000000 --- a/dev/release/verify-release-candidate.sh +++ /dev/null @@ -1,178 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -case $# in - 2) VERSION="$1" - RC_NUMBER="$2" - ;; - *) echo "Usage: $0 X.Y.Z RC_NUMBER" - exit 1 - ;; -esac - -set -e -set -x -set -o pipefail - -SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -DATAFUSION_PYTHON_DIR="$(dirname $(dirname ${SOURCE_DIR}))" -DATAFUSION_PYTHON_DIST_URL='https://dist.apache.org/repos/dist/dev/datafusion' - -download_dist_file() { - curl \ - --silent \ - --show-error \ - --fail \ - --location \ - --remote-name $DATAFUSION_PYTHON_DIST_URL/$1 -} - -download_rc_file() { - download_dist_file apache-datafusion-python-${VERSION}-rc${RC_NUMBER}/$1 -} - -import_gpg_keys() { - download_dist_file KEYS - gpg --import KEYS -} - -if type shasum >/dev/null 2>&1; then - sha256_verify="shasum -a 256 -c" - sha512_verify="shasum -a 512 -c" -else - sha256_verify="sha256sum -c" - sha512_verify="sha512sum -c" -fi - -fetch_archive() { - local dist_name=$1 - download_rc_file ${dist_name}.tar.gz - download_rc_file ${dist_name}.tar.gz.asc - download_rc_file ${dist_name}.tar.gz.sha256 - download_rc_file ${dist_name}.tar.gz.sha512 - verify_dir_artifact_signatures -} - -verify_dir_artifact_signatures() { - # verify the signature and the checksums of each artifact - find . -name '*.asc' | while read sigfile; do - artifact=${sigfile/.asc/} - gpg --verify $sigfile $artifact || exit 1 - - # go into the directory because the checksum files contain only the - # basename of the artifact - pushd $(dirname $artifact) - base_artifact=$(basename $artifact) - ${sha256_verify} $base_artifact.sha256 || exit 1 - ${sha512_verify} $base_artifact.sha512 || exit 1 - popd - done -} - -setup_tempdir() { - cleanup() { - if [ "${TEST_SUCCESS}" = "yes" ]; then - rm -fr "${DATAFUSION_PYTHON_TMPDIR}" - else - echo "Failed to verify release candidate. See ${DATAFUSION_PYTHON_TMPDIR} for details." - fi - } - - if [ -z "${DATAFUSION_PYTHON_TMPDIR}" ]; then - # clean up automatically if DATAFUSION_PYTHON_TMPDIR is not defined - DATAFUSION_PYTHON_TMPDIR=$(mktemp -d -t "$1.XXXXX") - trap cleanup EXIT - else - # don't clean up automatically - mkdir -p "${DATAFUSION_PYTHON_TMPDIR}" - fi -} - -test_source_distribution() { - # install rust toolchain - export RUSTUP_HOME=$PWD/test-rustup - export CARGO_HOME=$PWD/test-rustup - - curl https://sh.rustup.rs -sSf | sh -s -- -y --no-modify-path - - # On Unix, rustup creates an env file. On Windows GitHub runners (MSYS bash), - # that file may not exist, so fall back to adding Cargo bin directly. - if [ -f "$CARGO_HOME/env" ]; then - # shellcheck disable=SC1090 - source "$CARGO_HOME/env" - elif [ -f "$RUSTUP_HOME/env" ]; then - # shellcheck disable=SC1090 - source "$RUSTUP_HOME/env" - else - export PATH="$CARGO_HOME/bin:$PATH" - fi - - # build and test rust - - # raises on any formatting errors - rustup component add rustfmt --toolchain stable - cargo fmt --all -- --check - - # Clone testing repositories into the expected location - git clone https://github.com/apache/arrow-testing.git testing - git clone https://github.com/apache/parquet-testing.git parquet-testing - - python3 -m venv .venv - if [ -x ".venv/bin/python" ]; then - VENV_PYTHON=".venv/bin/python" - elif [ -x ".venv/Scripts/python.exe" ]; then - VENV_PYTHON=".venv/Scripts/python.exe" - elif [ -x ".venv/Scripts/python" ]; then - VENV_PYTHON=".venv/Scripts/python" - else - echo "Unable to find python executable in virtual environment" - exit 1 - fi - - "$VENV_PYTHON" -m pip install -U pip - "$VENV_PYTHON" -m pip install -U maturin - "$VENV_PYTHON" -m maturin develop - - #TODO: we should really run tests here as well - #python3 -m pytest - - if ( find -iname 'Cargo.toml' | xargs grep SNAPSHOT ); then - echo "Cargo.toml version should not contain SNAPSHOT for releases" - exit 1 - fi -} - -TEST_SUCCESS=no - -setup_tempdir "datafusion-python-${VERSION}" -echo "Working in sandbox ${DATAFUSION_PYTHON_TMPDIR}" -cd ${DATAFUSION_PYTHON_TMPDIR} - -dist_name="apache-datafusion-python-${VERSION}" -import_gpg_keys -fetch_archive ${dist_name} -tar xf ${dist_name}.tar.gz -pushd ${dist_name} - test_source_distribution -popd - -TEST_SUCCESS=yes -echo 'Release candidate looks good!' -exit 0 diff --git a/dev/rust_lint.sh b/dev/rust_lint.sh deleted file mode 100755 index eeb9e2302..000000000 --- a/dev/rust_lint.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# This script runs all the Rust lints locally the same way the -# DataFusion CI does - -set -e -if ! command -v cargo-tomlfmt &> /dev/null; then - echo "Installing cargo-tomlfmt using cargo" - cargo install cargo-tomlfmt -fi - -ci/scripts/rust_fmt.sh -ci/scripts/rust_clippy.sh -ci/scripts/rust_toml_fmt.sh diff --git a/docs/.gitignore b/docs/.gitignore deleted file mode 100644 index 6e8a53b6f..000000000 --- a/docs/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -pokemon.csv -yellow_trip_data.parquet -yellow_tripdata_2021-01.parquet - diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index 49ebae372..000000000 --- a/docs/Makefile +++ /dev/null @@ -1,38 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) --fail-on-warning \ No newline at end of file diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 502f1c2a1..000000000 --- a/docs/README.md +++ /dev/null @@ -1,70 +0,0 @@ - - -# DataFusion Documentation - -This folder contains the source content of the [Python API](./source/api). -This is published to https://datafusion.apache.org/python by a GitHub action -when changes are merged to the main branch. - -## Dependencies - -It's recommended to install build dependencies and build the documentation -inside a Python `venv` using `uv`. - -To prepare building the documentation run the following on the root level of the project: - -```bash -# Set up a virtual environment with the documentation dependencies -uv sync --dev --group docs --no-install-package datafusion -``` - -## Build & Preview - -Run the provided script to build the HTML pages. - -```bash -# Build the repository -uv run --no-project maturin develop --uv -# Build the documentation -uv run --no-project docs/build.sh -``` - -The HTML will be generated into a `build` directory in `docs`. - -Preview the site on Linux by running this command. - -```bash -firefox docs/build/html/index.html -``` - -## Release Process - -This documentation is hosted at https://datafusion.apache.org/python - -When the PR is merged to the `main` branch of the DataFusion -repository, a [github workflow](https://github.com/apache/datafusion-python/blob/main/.github/workflows/build.yml) which: - -1. Builds the html content -2. Pushes the html content to the [`asf-site`](https://github.com/apache/datafusion-python/tree/asf-site) branch in this repository. - -The Apache Software Foundation provides https://arrow.apache.org/, -which serves content based on the configuration in -[.asf.yaml](https://github.com/apache/datafusion-python/blob/main/.asf.yaml), -which specifies the target as https://datafusion.apache.org/python. diff --git a/docs/build.sh b/docs/build.sh deleted file mode 100755 index f73330323..000000000 --- a/docs/build.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -set -e - -original_dir=$(pwd) -script_dir=$(dirname "$(realpath "$0")") -cd "$script_dir" || exit - -if [ ! -f pokemon.csv ]; then - curl -O https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv -fi - -if [ ! -f yellow_tripdata_2021-01.parquet ]; then - curl -O https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet -fi - -rm -rf build 2> /dev/null -rm -rf temp 2> /dev/null -mkdir temp -cp -rf source/* temp/ -make SOURCEDIR=`pwd`/temp html - -cd "$original_dir" || exit diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index 1ba680232..000000000 --- a/docs/make.bat +++ /dev/null @@ -1,52 +0,0 @@ -@rem Licensed to the Apache Software Foundation (ASF) under one -@rem or more contributor license agreements. See the NOTICE file -@rem distributed with this work for additional information -@rem regarding copyright ownership. The ASF licenses this file -@rem to you under the Apache License, Version 2.0 (the -@rem "License"); you may not use this file except in compliance -@rem with the License. You may obtain a copy of the License at -@rem -@rem http://www.apache.org/licenses/LICENSE-2.0 -@rem -@rem Unless required by applicable law or agreed to in writing, -@rem software distributed under the License is distributed on an -@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -@rem KIND, either express or implied. See the License for the -@rem specific language governing permissions and limitations -@rem under the License. - -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd \ No newline at end of file diff --git a/docs/mdbook/README.md b/docs/mdbook/README.md deleted file mode 100644 index 6dae6bc62..000000000 --- a/docs/mdbook/README.md +++ /dev/null @@ -1,33 +0,0 @@ - -# DataFusion Book - -This folder builds a DataFusion user guide using [mdBook](https://github.com/rust-lang/mdBook). - -## Build and run book locally - -Build the latest files with `mdbook build`. - -Open the book locally by running `open book/index.html`. - -## Install mdBook - -Download the `mdbook` binary or run `cargo install mdbook`. - -Then manually open it, so you have permissions to run it on your Mac. - -Add it to your path with a command like this so you can easily run the commands: `mv ~/Downloads/mdbook /Users/matthew.powers/.local/bin`. diff --git a/docs/mdbook/book.toml b/docs/mdbook/book.toml deleted file mode 100644 index 089cb9a97..000000000 --- a/docs/mdbook/book.toml +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[book] -authors = ["Apache Arrow "] -language = "en" -multilingual = false -src = "src" -title = "DataFusion Book" diff --git a/docs/mdbook/src/SUMMARY.md b/docs/mdbook/src/SUMMARY.md deleted file mode 100644 index 23467ed4c..000000000 --- a/docs/mdbook/src/SUMMARY.md +++ /dev/null @@ -1,25 +0,0 @@ - -# Summary - -- [Index](./index.md) -- [Installation](./installation.md) -- [Quickstart](./quickstart.md) -- [Usage](./usage/index.md) - - [Create a table](./usage/create-table.md) - - [Query a table](./usage/query-table.md) - - [Viewing Query Plans](./usage/query-plans.md) \ No newline at end of file diff --git a/docs/mdbook/src/images/datafusion-jupyterlab.png b/docs/mdbook/src/images/datafusion-jupyterlab.png deleted file mode 100644 index c4d46884e..000000000 Binary files a/docs/mdbook/src/images/datafusion-jupyterlab.png and /dev/null differ diff --git a/docs/mdbook/src/images/plan.svg b/docs/mdbook/src/images/plan.svg deleted file mode 100644 index 927147985..000000000 --- a/docs/mdbook/src/images/plan.svg +++ /dev/null @@ -1,111 +0,0 @@ - - - - - - -%3 - - -cluster_1 - -LogicalPlan - - -cluster_6 - -Detailed LogicalPlan - - - -2 - -Projection: my_table.a, SUM(my_table.b) - - - -3 - -Aggregate: groupBy=[[my_table.a]], aggr=[[SUM(my_table.b)]] - - - -2->3 - - - - - -4 - -Filter: my_table.a < Int64(3) - - - -3->4 - - - - - -5 - -TableScan: my_table - - - -4->5 - - - - - -7 - -Projection: my_table.a, SUM(my_table.b) -Schema: [a:Int64;N, SUM(my_table.b):Int64;N] - - - -8 - -Aggregate: groupBy=[[my_table.a]], aggr=[[SUM(my_table.b)]] -Schema: [a:Int64;N, SUM(my_table.b):Int64;N] - - - -7->8 - - - - - -9 - -Filter: my_table.a < Int64(3) -Schema: [a:Int64;N, b:Int64;N] - - - -8->9 - - - - - -10 - -TableScan: my_table -Schema: [a:Int64;N, b:Int64;N] - - - -9->10 - - - - - diff --git a/docs/mdbook/src/index.md b/docs/mdbook/src/index.md deleted file mode 100644 index 2c1d217f8..000000000 --- a/docs/mdbook/src/index.md +++ /dev/null @@ -1,43 +0,0 @@ - -# DataFusion Book - -DataFusion is a blazing fast query engine that lets you run data analyses quickly and reliably. - -DataFusion is written in Rust, but also exposes Python and SQL bindings, so you can easily query data in your language of choice. You don't need to know any Rust to be a happy and productive user of DataFusion. - -DataFusion lets you run queries faster than pandas. Let's compare query runtimes for a 5GB CSV file with 100 million rows of data. - -Take a look at a few rows of the data: - -``` -+-------+-------+--------------+-----+-----+-------+----+----+-----------+ -| id1 | id2 | id3 | id4 | id5 | id6 | v1 | v2 | v3 | -+-------+-------+--------------+-----+-----+-------+----+----+-----------+ -| id016 | id016 | id0000042202 | 15 | 24 | 5971 | 5 | 11 | 37.211254 | -| id039 | id045 | id0000029558 | 40 | 49 | 39457 | 5 | 4 | 48.951141 | -| id047 | id023 | id0000071286 | 68 | 20 | 74463 | 2 | 14 | 60.469241 | -+-------+-------+--------------+-----+-----+-------+----+----+-----------+ -``` - -Suppose you'd like to run the following query: `SELECT id1, sum(v1) AS v1 from the_table GROUP BY id1`. - -If you use pandas, then this query will take 43.6 seconds to execute. - -It only takes DataFusion 9.8 seconds to execute the same query. - -DataFusion is easy to use, powerful, and fast. Let's learn more! diff --git a/docs/mdbook/src/installation.md b/docs/mdbook/src/installation.md deleted file mode 100644 index b29f3b66b..000000000 --- a/docs/mdbook/src/installation.md +++ /dev/null @@ -1,63 +0,0 @@ - -# Installation - -DataFusion is easy to install, just like any other Python library. - -## Using uv - -If you do not yet have a virtual environment, create one: - -```bash -uv venv -``` - -You can add datafusion to your virtual environment with the usual: - -```bash -uv pip install datafusion -``` - -Or, to add to a project: - -```bash -uv add datafusion -``` - -## Using pip - -``` bash -pip install datafusion -``` - -## uv & JupyterLab setup - -This section explains how to install DataFusion in a uv environment with other libraries that allow for a nice Jupyter workflow. This setup is completely optional. These steps are only needed if you'd like to run DataFusion in a Jupyter notebook and have an interface like this: - -![DataFusion in Jupyter](https://github.com/MrPowers/datafusion-book/raw/main/src/images/datafusion-jupyterlab.png) - -Create a virtual environment with DataFusion, Jupyter, and other useful dependencies and start the desktop application. - -```bash -uv venv -uv pip install datafusion jupyterlab jupyterlab_code_formatter -uv run jupyter lab -``` - -## Examples - -See the [DataFusion Python Examples](https://github.com/apache/arrow-datafusion-python/tree/main/examples) for a variety of Python scripts that show DataFusion in action! diff --git a/docs/mdbook/src/quickstart.md b/docs/mdbook/src/quickstart.md deleted file mode 100644 index bba0b36ae..000000000 --- a/docs/mdbook/src/quickstart.md +++ /dev/null @@ -1,77 +0,0 @@ - -# DataFusion Quickstart - -You can easily query a DataFusion table with the Python API or with pure SQL. - -Let's create a small DataFrame and then run some queries with both APIs. - -Start by creating a DataFrame with four rows of data and two columns: `a` and `b`. - -```python -from datafusion import SessionContext - -ctx = SessionContext() - -df = ctx.from_pydict({"a": [1, 2, 3, 1], "b": [4, 5, 6, 7]}, name="my_table") -``` - -Let's append a column to this DataFrame that adds columns `a` and `b` with the SQL API. - -``` -ctx.sql("select a, b, a + b as sum_a_b from my_table") - -+---+---+---------+ -| a | b | sum_a_b | -+---+---+---------+ -| 1 | 4 | 5 | -| 2 | 5 | 7 | -| 3 | 6 | 9 | -| 1 | 7 | 8 | -+---+---+---------+ -``` - -DataFusion makes it easy to run SQL queries on DataFrames. - -Now let's run the same query with the DataFusion Python API: - -```python -from datafusion import col - -df.select( - col("a"), - col("b"), - col("a") + col("b"), -) -``` - -We get the same result as before: - -``` -+---+---+-------------------------+ -| a | b | my_table.a + my_table.b | -+---+---+-------------------------+ -| 1 | 4 | 5 | -| 2 | 5 | 7 | -| 3 | 6 | 9 | -| 1 | 7 | 8 | -+---+---+-------------------------+ -``` - -DataFusion also allows you to query data with a well-designed Python interface. - -Python users have two great ways to query DataFusion tables. diff --git a/docs/mdbook/src/usage/create-table.md b/docs/mdbook/src/usage/create-table.md deleted file mode 100644 index 98870fac0..000000000 --- a/docs/mdbook/src/usage/create-table.md +++ /dev/null @@ -1,59 +0,0 @@ - -# DataFusion Create Table - -It's easy to create DataFusion tables from a variety of data sources. - -## Create Table from Python Dictionary - -Here's how to create a DataFusion table from a Python dictionary: - -```python -from datafusion import SessionContext - -ctx = SessionContext() - -df = ctx.from_pydict({"a": [1, 2, 3, 1], "b": [4, 5, 6, 7]}, name="my_table") -``` - -Supplying the `name` parameter is optional. You only need to name the table if you'd like to query it with the SQL API. - -You can also create a DataFrame without a name that can be queried with the Python API: - -```python -from datafusion import SessionContext - -ctx = SessionContext() - -df = ctx.from_pydict({"a": [1, 2, 3, 1], "b": [4, 5, 6, 7]}) -``` - -## Create Table from CSV - -You can read a CSV into a DataFusion DataFrame. Here's how to read the `G1_1e8_1e2_0_0.csv` file into a table named `csv_1e8`: - -```python -ctx.register_csv("csv_1e8", "G1_1e8_1e2_0_0.csv") -``` - -## Create Table from Parquet - -You can read a Parquet file into a DataFusion DataFrame. Here's how to read the `yellow_tripdata_2021-01.parquet` file into a table named `taxi`. - -```python -ctx.register_table("taxi", "yellow_tripdata_2021-01.parquet") -``` diff --git a/docs/mdbook/src/usage/index.md b/docs/mdbook/src/usage/index.md deleted file mode 100644 index 1ef4406f7..000000000 --- a/docs/mdbook/src/usage/index.md +++ /dev/null @@ -1,25 +0,0 @@ - -# Usage - -This section shows how to create DataFusion DataFrames from a variety of data sources like CSV files and Parquet files. - -You'll learn more about the SQL statements that are supported by DataFusion. - -You'll also learn about the DataFusion's Python API for querying data. - -The documentation will wrap up with a variety of real-world data processing tasks that are well suited for DataFusion. The lightning-fast speed and reliable execution makes DataFusion the best technology for a variety of data processing tasks. diff --git a/docs/mdbook/src/usage/query-plans.md b/docs/mdbook/src/usage/query-plans.md deleted file mode 100644 index a39aa9e42..000000000 --- a/docs/mdbook/src/usage/query-plans.md +++ /dev/null @@ -1,170 +0,0 @@ - - -# DataFusion Query Plans - -DataFusion's `DataFrame` is a wrapper around a query plan. In this chapter we will learn how to view -logical and physical query plans for DataFrames. - -## Sample Data - -Let's go ahead and create a simple DataFrame. You can do this in the Python shell or in a notebook. - -```python -from datafusion import SessionContext - -ctx = SessionContext() - -df = ctx.from_pydict({"a": [1, 2, 3, 1], "b": [4, 5, 6, 7]}, name="my_table") -``` - -## Logical Plan - -Next, let's look at the logical plan for this dataframe. - -```python ->>> df.logical_plan() -TableScan: my_table -``` - -The logical plan here consists of a single `TableScan` operator. Let's make a more interesting plan by creating a new -`DataFrame` representing an aggregate query with a filter. - -```python ->>> df = ctx.sql("SELECT a, sum(b) FROM my_table WHERE a < 3 GROUP BY a") -``` - -When we view the plan for this `DataFrame` we can see that there are now four operators in the plan, each -representing a logical transformation of the data. We start with a `TableScan` to read the data, followed by -a `Filter` to filter out rows that do not match the filter expression, then an `Aggregate` is performed. Finally, -a `Projection` is applied to ensure that the order of the final columns matches the `SELECT` part of the SQL query. - -```python ->>> df.logical_plan() -Projection: my_table.a, SUM(my_table.b) - Aggregate: groupBy=[[my_table.a]], aggr=[[SUM(my_table.b)]] - Filter: my_table.a < Int64(3) - TableScan: my_table -``` - -## Optimized Logical Plan - -DataFusion has a powerful query optimizer which will rewrite query plans to make them more efficient before they are -executed. We can view the output of the optimized by viewint the optimized logical plan. - -```python ->>> df.optimized_logical_plan() -Aggregate: groupBy=[[my_table.a]], aggr=[[SUM(my_table.b)]] - Filter: my_table.a < Int64(3) - TableScan: my_table projection=[a, b] -``` - -We can see that there are two key differences compared to the unoptimized logical plan: - -- The `Projection` has been removed because it was redundant in this case (the output of the `Aggregatge` plan - already had the columns in the correct order). -- The `TableScan` now has a projection pushed down so that it only reads the columns required to be able to execute - the query. In this case the table only has two columns and we referenced them both in the query, but this optimization - can be very effective in real-world queries against large tables. - -## Physical Plan - -Logical plans provide a representation of "what" the query should do it. Physical plans explain "how" the query -should be executed. - -We can view the physical plan (also known as an execution plan) using the `execution_plan` method. - -```python ->>> df.execution_plan() -AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[SUM(my_table.b)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "a", index: 0 }], 48), input_partitions=48 - AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[SUM(my_table.b)] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: a@0 < 3 - RepartitionExec: partitioning=RoundRobinBatch(48), input_partitions=1 - MemoryExec: partitions=1, partition_sizes=[1] -``` - -The `TableScan` has now been replaced by a more specific `MemoryExec` for scanning the in-memory data. If we were -querying a CSV file on disk then we would expect to see a `CsvExec` instead. - -This plan has additional operators that were not in the logical plan: - -- `RepartionExec` has been added so that the data can be split into partitions and processed in parallel using - multiple cores. -- `CoalesceBatchesExec` will combine small batches into larger batches to ensure that processing remains efficient. - -The `Aggregate` operator now appears twice. This is because aggregates are performed in a two step process. Data is -aggregated within each partition in parallel and then those results (which could contain duplicate grouping keys) are -combined and the aggregate operations is applied again. - -## Creating Query Plan Diagrams - -DataFusion supports generating query plan diagrams in [DOT format](). - -DOT is a language for describing graphs and there are open source tools such as GraphViz that can render diagrams -from DOT files. - -We can use the following code to generate a DOT file for a logical query plan. - -```python ->>> diagram = df.logical_plan().display_graphviz() ->>> with open('plan.dot', 'w') as f: ->>> f.write(diagram) -``` - -If we view the view, we will see the following content. - -``` -// Begin DataFusion GraphViz Plan (see https://graphviz.org) -digraph { - subgraph cluster_1 - { - graph[label="LogicalPlan"] - 2[shape=box label="Projection: my_table.a, SUM(my_table.b)"] - 3[shape=box label="Aggregate: groupBy=[[my_table.a]], aggr=[[SUM(my_table.b)]]"] - 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back] - 4[shape=box label="Filter: my_table.a < Int64(3)"] - 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back] - 5[shape=box label="TableScan: my_table"] - 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back] - } - subgraph cluster_6 - { - graph[label="Detailed LogicalPlan"] - 7[shape=box label="Projection: my_table.a, SUM(my_table.b)\nSchema: [a:Int64;N, SUM(my_table.b):Int64;N]"] - 8[shape=box label="Aggregate: groupBy=[[my_table.a]], aggr=[[SUM(my_table.b)]]\nSchema: [a:Int64;N, SUM(my_table.b):Int64;N]"] - 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back] - 9[shape=box label="Filter: my_table.a < Int64(3)\nSchema: [a:Int64;N, b:Int64;N]"] - 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back] - 10[shape=box label="TableScan: my_table\nSchema: [a:Int64;N, b:Int64;N]"] - 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back] - } -} -// End DataFusion GraphViz Plan -``` - -We can use GraphViz from the command-line to convert this DOT file into an image. - -```bash -dot -Tsvg plan.dot > plan.svg -``` - -This generates the following diagram: - -![Query Plan Diagram](../images/plan.svg) diff --git a/docs/mdbook/src/usage/query-table.md b/docs/mdbook/src/usage/query-table.md deleted file mode 100644 index 5e4e38001..000000000 --- a/docs/mdbook/src/usage/query-table.md +++ /dev/null @@ -1,125 +0,0 @@ - -# DataFusion Query Table - -DataFusion tables can be queried with SQL or with the Python API. - -Let's create a small table and show the different types of queries that can be run. - -```python -df = ctx.from_pydict( - { - "first_name": ["li", "wang", "ron", "amanda"], - "age": [25, 75, 68, 18], - "country": ["china", "china", "us", "us"], - }, - name="some_people", -) -``` - -Here's the data in the table: - -``` -+------------+-----+---------+ -| first_name | age | country | -+------------+-----+---------+ -| li | 25 | china | -| wang | 75 | china | -| ron | 68 | us | -| amanda | 18 | us | -+------------+-----+---------+ -``` - -## DataFusion Filter DataFrame - -Here's how to find all individuals that are older than 65 years old in the data with SQL: - -``` -ctx.sql("select * from some_people where age > 65") - -+------------+-----+---------+ -| first_name | age | country | -+------------+-----+---------+ -| wang | 75 | china | -| ron | 68 | us | -+------------+-----+---------+ -``` - -Here's how to run the same query with Python: - -```python -df.filter(col("age") > lit(65)) -``` - -``` -+------------+-----+---------+ -| first_name | age | country | -+------------+-----+---------+ -| wang | 75 | china | -| ron | 68 | us | -+------------+-----+---------+ -``` - -## DataFusion Select Columns from DataFrame - -Here's how to select the `first_name` and `country` columns from the DataFrame with SQL: - -``` -ctx.sql("select first_name, country from some_people") - - -+------------+---------+ -| first_name | country | -+------------+---------+ -| li | china | -| wang | china | -| ron | us | -| amanda | us | -+------------+---------+ -``` - -Here's how to run the same query with Python: - -```python -df.select(col("first_name"), col("country")) -``` - -``` -+------------+---------+ -| first_name | country | -+------------+---------+ -| li | china | -| wang | china | -| ron | us | -| amanda | us | -+------------+---------+ -``` - -## DataFusion Aggregation Query - -Here's how to run a group by aggregation query: - -``` -ctx.sql("select country, count(*) as num_people from some_people group by country") - -+---------+------------+ -| country | num_people | -+---------+------------+ -| china | 2 | -| us | 2 | -+---------+------------+ -``` diff --git a/docs/source/_templates/docs-sidebar.html b/docs/source/_templates/docs-sidebar.html deleted file mode 100644 index 44deeed25..000000000 --- a/docs/source/_templates/docs-sidebar.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html deleted file mode 100644 index 9f7880049..000000000 --- a/docs/source/_templates/layout.html +++ /dev/null @@ -1,26 +0,0 @@ -{% extends "pydata_sphinx_theme/layout.html" %} - -{# Silence the navbar #} -{% block docs_navbar %} -{% endblock %} - - -{% block footer %} - -
-
- {% for footer_item in theme_footer_items %} - - {% endfor %} - -
-
- -{% endblock %} diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index 01813b032..000000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,144 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Documentation generation.""" - -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) - -# -- Project information ----------------------------------------------------- - -project = "Apache Arrow DataFusion" -copyright = "2019-2024, Apache Software Foundation" -author = "Apache Software Foundation" - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "sphinx.ext.mathjax", - "sphinx.ext.napoleon", - "myst_parser", - "IPython.sphinxext.ipython_directive", - "autoapi.extension", -] - -source_suffix = { - ".rst": "restructuredtext", - ".md": "markdown", -} - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - -autoapi_dirs = ["../../python"] -autoapi_ignore = ["*tests*"] -autoapi_member_order = "groupwise" -suppress_warnings = ["autoapi.python_import_resolution"] -autoapi_python_class_content = "both" -autoapi_keep_files = False # set to True for debugging generated files - - -def autoapi_skip_member_fn(app, what, name, obj, skip, options) -> bool: # noqa: ARG001 - skip_contents = [ - # Re-exports - ("class", "datafusion.DataFrame"), - ("class", "datafusion.SessionContext"), - ("module", "datafusion.common"), - # Duplicate modules (skip module-level docs to avoid duplication) - ("module", "datafusion.col"), - ("module", "datafusion.udf"), - # Deprecated - ("class", "datafusion.substrait.serde"), - ("class", "datafusion.substrait.plan"), - ("class", "datafusion.substrait.producer"), - ("class", "datafusion.substrait.consumer"), - ("method", "datafusion.context.SessionContext.tables"), - ("method", "datafusion.dataframe.DataFrame.unnest_column"), - ] - # Explicitly skip certain members listed above. These are either - # re-exports, duplicate module-level documentation, deprecated - # API surfaces, or private variables that would otherwise appear - # in the generated docs and cause confusing duplication. - # Keeping this explicit list avoids surprising entries in the - # AutoAPI output and gives us a single place to opt-out items - # when we intentionally hide them from the docs. - if (what, name) in skip_contents: - skip = True - - return skip - - -def setup(sphinx) -> None: - sphinx.connect("autoapi-skip-member", autoapi_skip_member_fn) - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "pydata_sphinx_theme" - -html_theme_options = {"use_edit_page_button": False, "show_toc_level": 2} - -html_context = { - "github_user": "apache", - "github_repo": "arrow-datafusion-python", - "github_version": "main", - "doc_path": "docs/source", -} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] - -html_logo = "_static/images/2x_bgwhite_original.png" - -html_css_files = ["theme_overrides.css"] - -html_sidebars = { - "**": ["docs-sidebar.html"], -} - -# tell myst_parser to auto-generate anchor links for headers h1, h2, h3 -myst_heading_anchors = 3 - -# enable nice rendering of checkboxes for the task lists -myst_enable_extensions = ["tasklist"] diff --git a/examples/README.md b/examples/README.md deleted file mode 100644 index 0ef194afe..000000000 --- a/examples/README.md +++ /dev/null @@ -1,118 +0,0 @@ - - -# DataFusion Python Examples - -Some examples rely on data which can be downloaded from the following site: - -- https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page - -Here is a direct link to the file used in the examples: - -- https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet - -### Creating a SessionContext - -- [Creating a SessionContext](./create-context.py) - -### Executing Queries with DataFusion - -- [Query a Parquet file using SQL](./sql-parquet.py) -- [Query a Parquet file using the DataFrame API](./dataframe-parquet.py) -- [Run a SQL query and store the results in a Pandas DataFrame](./sql-to-pandas.py) -- [Query PyArrow Data](./query-pyarrow-data.py) - -### Running User-Defined Python Code - -- [Register a Python UDF with DataFusion](./python-udf.py) -- [Register a Python UDAF with DataFusion](./python-udaf.py) - -### Substrait Support - -- [Serialize query plans using Substrait](./substrait.py) - -### Executing SQL against DataFrame Libraries (Experimental) - -- [Executing SQL on Polars](./sql-on-polars.py) -- [Executing SQL on Pandas](./sql-on-pandas.py) -- [Executing SQL on cuDF](./sql-on-cudf.py) - -## TPC-H Examples - -Within the subdirectory `tpch` there are 22 examples that reproduce queries in -the TPC-H specification. These include realistic data that can be generated at -arbitrary scale and allow the user to see use cases for a variety of data frame -operations. - -In the list below we describe which new operations can be found in the examples. -The queries are designed to be of increasing complexity, so it is recommended to -review them in order. For brevity, the following list does not include operations -found in previous examples. - -- [Convert CSV to Parquet](./tpch/convert_data_to_parquet.py) - - Read from a CSV files where the delimiter is something other than a comma - - Specify schema during CVS reading - - Write to a parquet file -- [Pricing Summary Report](./tpch/q01_pricing_summary_report.py) - - Aggregation computing the maximum value, average, sum, and number of entries - - Filter data by date and interval - - Sorting -- [Minimum Cost Supplier](./tpch/q02_minimum_cost_supplier.py) - - Window operation to find minimum - - Sorting in descending order -- [Shipping Priority](./tpch/q03_shipping_priority.py) -- [Order Priority Checking](./tpch/q04_order_priority_checking.py) - - Aggregating multiple times in one data frame -- [Local Supplier Volume](./tpch/q05_local_supplier_volume.py) -- [Forecasting Revenue Change](./tpch/q06_forecasting_revenue_change.py) - - Using collect and extracting values as a python object -- [Volume Shipping](./tpch/q07_volume_shipping.py) - - Finding multiple distinct and mutually exclusive values within one dataframe - - Using `case` and `when` statements -- [Market Share](./tpch/q08_market_share.py) - - The operations in this query are similar to those in the prior examples, but - it is a more complex example of using filters, joins, and aggregates - - Using left outer joins -- [Product Type Profit Measure](./tpch/q09_product_type_profit_measure.py) - - Extract year from a date -- [Returned Item Reporting](./tpch/q10_returned_item_reporting.py) -- [Important Stock Identification](./tpch/q11_important_stock_identification.py) -- [Shipping Modes and Order](./tpch/q12_ship_mode_order_priority.py) - - Finding non-null values using a boolean operation in a filter - - Case statement with default value -- [Customer Distribution](./tpch/q13_customer_distribution.py) -- [Promotion Effect](./tpch/q14_promotion_effect.py) -- [Top Supplier](./tpch/q15_top_supplier.py) -- [Parts/Supplier Relationship](./tpch/q16_part_supplier_relationship.py) - - Using anti joins - - Using regular expressions (regex) - - Creating arrays of literal values - - Determine if an element exists within an array -- [Small-Quantity-Order Revenue](./tpch/q17_small_quantity_order.py) -- [Large Volume Customer](./tpch/q18_large_volume_customer.py) -- [Discounted Revenue](./tpch/q19_discounted_revenue.py) - - Creating a user defined function (UDF) - - Convert pyarrow Array to python values - - Filtering based on a UDF -- [Potential Part Promotion](./tpch/q20_potential_part_promotion.py) - - Extracting part of a string using substr -- [Suppliers Who Kept Orders Waiting](./tpch/q21_suppliers_kept_orders_waiting.py) - - Using array aggregation - - Determining the size of array elements -- [Global Sales Opportunity](./tpch/q22_global_sales_opportunity.py) diff --git a/examples/chart.png b/examples/chart.png deleted file mode 100644 index 743583e06..000000000 Binary files a/examples/chart.png and /dev/null differ diff --git a/examples/create-context.py b/examples/create-context.py deleted file mode 100644 index 0026d6162..000000000 --- a/examples/create-context.py +++ /dev/null @@ -1,42 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from datafusion import RuntimeEnvBuilder, SessionConfig, SessionContext - -# create a session context with default settings -ctx = SessionContext() -print(ctx) - -# create a session context with explicit runtime and config settings -runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000) -config = ( - SessionConfig() - .with_create_default_catalog_and_schema(enabled=True) - .with_default_catalog_and_schema("foo", "bar") - .with_target_partitions(8) - .with_information_schema(enabled=True) - .with_repartition_joins(enabled=False) - .with_repartition_aggregations(enabled=False) - .with_repartition_windows(enabled=False) - .with_parquet_pruning(enabled=False) - .set("datafusion.execution.parquet.pushdown_filters", "true") -) -ctx = SessionContext(config, runtime) -print(ctx) - -ctx = ctx.enable_url_table() -print(ctx) diff --git a/examples/csv-read-options.py b/examples/csv-read-options.py deleted file mode 100644 index a5952d950..000000000 --- a/examples/csv-read-options.py +++ /dev/null @@ -1,96 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Example demonstrating CsvReadOptions usage.""" - -from datafusion import CsvReadOptions, SessionContext - -# Create a SessionContext -ctx = SessionContext() - -# Example 1: Using CsvReadOptions with default values -print("Example 1: Default CsvReadOptions") -options = CsvReadOptions() -df = ctx.read_csv("data.csv", options=options) - -# Example 2: Using CsvReadOptions with custom parameters -print("\nExample 2: Custom CsvReadOptions") -options = CsvReadOptions( - has_header=True, - delimiter=",", - quote='"', - schema_infer_max_records=1000, - file_extension=".csv", -) -df = ctx.read_csv("data.csv", options=options) - -# Example 3: Using the builder pattern (recommended for readability) -print("\nExample 3: Builder pattern") -options = ( - CsvReadOptions() - .with_has_header(True) # noqa: FBT003 - .with_delimiter("|") - .with_quote("'") - .with_schema_infer_max_records(500) - .with_truncated_rows(False) # noqa: FBT003 - .with_newlines_in_values(True) # noqa: FBT003 -) -df = ctx.read_csv("data.csv", options=options) - -# Example 4: Advanced options -print("\nExample 4: Advanced options") -options = ( - CsvReadOptions() - .with_has_header(True) # noqa: FBT003 - .with_delimiter(",") - .with_comment("#") # Skip lines starting with # - .with_escape("\\") # Escape character - .with_null_regex(r"^(null|NULL|N/A)$") # Treat these as NULL - .with_truncated_rows(True) # noqa: FBT003 - .with_file_compression_type("gzip") # Read gzipped CSV - .with_file_extension(".gz") -) -df = ctx.read_csv("data.csv.gz", options=options) - -# Example 5: Register CSV table with options -print("\nExample 5: Register CSV table") -options = CsvReadOptions().with_has_header(True).with_delimiter(",") # noqa: FBT003 -ctx.register_csv("my_table", "data.csv", options=options) -df = ctx.sql("SELECT * FROM my_table") - -# Example 6: Backward compatibility (without options) -print("\nExample 6: Backward compatibility") -# Still works the old way! -df = ctx.read_csv("data.csv", has_header=True, delimiter=",") - -print("\nAll examples completed!") -print("\nFor all available options, see the CsvReadOptions documentation:") -print(" - has_header: bool") -print(" - delimiter: str") -print(" - quote: str") -print(" - terminator: str | None") -print(" - escape: str | None") -print(" - comment: str | None") -print(" - newlines_in_values: bool") -print(" - schema: pa.Schema | None") -print(" - schema_infer_max_records: int") -print(" - file_extension: str") -print(" - table_partition_cols: list[tuple[str, pa.DataType]]") -print(" - file_compression_type: str") -print(" - file_sort_order: list[list[SortExpr]]") -print(" - null_regex: str | None") -print(" - truncated_rows: bool") diff --git a/examples/dataframe-parquet.py b/examples/dataframe-parquet.py deleted file mode 100644 index 0f2e4b824..000000000 --- a/examples/dataframe-parquet.py +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from datafusion import SessionContext -from datafusion import functions as f - -ctx = SessionContext() -df = ctx.read_parquet("yellow_tripdata_2021-01.parquet").aggregate( - [f.col("passenger_count")], [f.count_star()] -) -df.show() diff --git a/examples/datafusion-ffi-example/.cargo/config.toml b/examples/datafusion-ffi-example/.cargo/config.toml deleted file mode 100644 index af951327f..000000000 --- a/examples/datafusion-ffi-example/.cargo/config.toml +++ /dev/null @@ -1,5 +0,0 @@ -[target.x86_64-apple-darwin] -rustflags = ["-C", "link-arg=-undefined", "-C", "link-arg=dynamic_lookup"] - -[target.aarch64-apple-darwin] -rustflags = ["-C", "link-arg=-undefined", "-C", "link-arg=dynamic_lookup"] diff --git a/examples/datafusion-ffi-example/Cargo.lock b/examples/datafusion-ffi-example/Cargo.lock deleted file mode 100644 index ede9b446b..000000000 --- a/examples/datafusion-ffi-example/Cargo.lock +++ /dev/null @@ -1,3127 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "abi_stable" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d6512d3eb05ffe5004c59c206de7f99c34951504056ce23fc953842f12c445" -dependencies = [ - "abi_stable_derive", - "abi_stable_shared", - "const_panic", - "core_extensions", - "crossbeam-channel", - "generational-arena", - "libloading", - "lock_api", - "parking_lot", - "paste", - "repr_offset", - "rustc_version", - "serde", - "serde_derive", - "serde_json", -] - -[[package]] -name = "abi_stable_derive" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7178468b407a4ee10e881bc7a328a65e739f0863615cca4429d43916b05e898" -dependencies = [ - "abi_stable_shared", - "as_derive_utils", - "core_extensions", - "proc-macro2", - "quote", - "rustc_version", - "syn 1.0.109", - "typed-arena", -] - -[[package]] -name = "abi_stable_shared" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2b5df7688c123e63f4d4d649cba63f2967ba7f7861b1664fca3f77d3dad2b63" -dependencies = [ - "core_extensions", -] - -[[package]] -name = "adler2" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" - -[[package]] -name = "ahash" -version = "0.8.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" -dependencies = [ - "cfg-if", - "const-random", - "getrandom 0.3.4", - "once_cell", - "version_check", - "zerocopy", -] - -[[package]] -name = "aho-corasick" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" -dependencies = [ - "memchr", -] - -[[package]] -name = "alloc-no-stdlib" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" - -[[package]] -name = "alloc-stdlib" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" -dependencies = [ - "alloc-no-stdlib", -] - -[[package]] -name = "allocator-api2" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "anyhow" -version = "1.0.101" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" - -[[package]] -name = "arc-swap" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" -dependencies = [ - "rustversion", -] - -[[package]] -name = "arrow" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602268ce9f569f282cedb9a9f6bac569b680af47b9b077d515900c03c5d190da" -dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", -] - -[[package]] -name = "arrow-arith" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd53c6bf277dea91f136ae8e3a5d7041b44b5e489e244e637d00ae302051f56f" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "num-traits", -] - -[[package]] -name = "arrow-array" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e53796e07a6525edaf7dc28b540d477a934aff14af97967ad1d5550878969b9e" -dependencies = [ - "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "chrono-tz", - "half", - "hashbrown 0.16.1", - "num-complex", - "num-integer", - "num-traits", -] - -[[package]] -name = "arrow-buffer" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2c1a85bb2e94ee10b76531d8bc3ce9b7b4c0d508cabfb17d477f63f2617bd20" -dependencies = [ - "bytes", - "half", - "num-bigint", - "num-traits", -] - -[[package]] -name = "arrow-cast" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89fb245db6b0e234ed8e15b644edb8664673fefe630575e94e62cd9d489a8a26" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-ord", - "arrow-schema", - "arrow-select", - "atoi", - "base64", - "chrono", - "comfy-table", - "half", - "lexical-core", - "num-traits", - "ryu", -] - -[[package]] -name = "arrow-csv" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d374882fb465a194462527c0c15a93aa19a554cf690a6b77a26b2a02539937a7" -dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-schema", - "chrono", - "csv", - "csv-core", - "regex", -] - -[[package]] -name = "arrow-data" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "189d210bc4244c715fa3ed9e6e22864673cccb73d5da28c2723fb2e527329b33" -dependencies = [ - "arrow-buffer", - "arrow-schema", - "half", - "num-integer", - "num-traits", -] - -[[package]] -name = "arrow-ipc" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7968c2e5210c41f4909b2ef76f6e05e172b99021c2def5edf3cc48fdd39d1d6c" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "flatbuffers", - "lz4_flex", -] - -[[package]] -name = "arrow-json" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92111dba5bf900f443488e01f00d8c4ddc2f47f5c50039d18120287b580baa22" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "chrono", - "half", - "indexmap", - "itoa", - "lexical-core", - "memchr", - "num-traits", - "ryu", - "serde_core", - "serde_json", - "simdutf8", -] - -[[package]] -name = "arrow-ord" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "211136cb253577ee1a6665f741a13136d4e563f64f5093ffd6fb837af90b9495" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", -] - -[[package]] -name = "arrow-row" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e0f20145f9f5ea3fe383e2ba7a7487bf19be36aa9dbf5dd6a1f92f657179663" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "half", -] - -[[package]] -name = "arrow-schema" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b47e0ca91cc438d2c7879fe95e0bca5329fff28649e30a88c6f760b1faeddcb" -dependencies = [ - "bitflags", -] - -[[package]] -name = "arrow-select" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "750a7d1dda177735f5e82a314485b6915c7cccdbb278262ac44090f4aba4a325" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "num-traits", -] - -[[package]] -name = "arrow-string" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1eab1208bc4fe55d768cdc9b9f3d9df5a794cdb3ee2586bf89f9b30dc31ad8c" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "memchr", - "num-traits", - "regex", - "regex-syntax", -] - -[[package]] -name = "as_derive_utils" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff3c96645900a44cf11941c111bd08a6573b0e2f9f69bc9264b179d8fae753c4" -dependencies = [ - "core_extensions", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "async-ffi" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4de21c0feef7e5a556e51af767c953f0501f7f300ba785cc99c47bdc8081a50" -dependencies = [ - "abi_stable", -] - -[[package]] -name = "async-trait" -version = "0.1.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "atoi" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" -dependencies = [ - "num-traits", -] - -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - -[[package]] -name = "base64" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - -[[package]] -name = "bitflags" -version = "2.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" - -[[package]] -name = "brotli" -version = "8.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] - -[[package]] -name = "brotli-decompressor" -version = "5.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", -] - -[[package]] -name = "bumpalo" -version = "3.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c81d250916401487680ed13b8b675660281dcfc3ab0121fe44c94bcab9eae2fb" - -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - -[[package]] -name = "bytes" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" - -[[package]] -name = "cc" -version = "1.2.56" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" -dependencies = [ - "find-msvc-tools", - "jobserver", - "libc", - "shlex", -] - -[[package]] -name = "cfg-if" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" - -[[package]] -name = "chrono" -version = "0.4.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" -dependencies = [ - "iana-time-zone", - "num-traits", - "windows-link", -] - -[[package]] -name = "chrono-tz" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" -dependencies = [ - "chrono", - "phf", -] - -[[package]] -name = "comfy-table" -version = "7.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" -dependencies = [ - "unicode-segmentation", - "unicode-width", -] - -[[package]] -name = "const-random" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" -dependencies = [ - "const-random-macro", -] - -[[package]] -name = "const-random-macro" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" -dependencies = [ - "getrandom 0.2.17", - "once_cell", - "tiny-keccak", -] - -[[package]] -name = "const_panic" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e262cdaac42494e3ae34c43969f9cdeb7da178bdb4b66fa6a1ea2edb4c8ae652" -dependencies = [ - "typewit", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - -[[package]] -name = "core_extensions" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42bb5e5d0269fd4f739ea6cedaf29c16d81c27a7ce7582008e90eb50dcd57003" -dependencies = [ - "core_extensions_proc_macros", -] - -[[package]] -name = "core_extensions_proc_macros" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533d38ecd2709b7608fb8e18e4504deb99e9a72879e6aa66373a76d8dc4259ea" - -[[package]] -name = "crossbeam-channel" -version = "0.5.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - -[[package]] -name = "csv" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde_core", -] - -[[package]] -name = "csv-core" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" -dependencies = [ - "memchr", -] - -[[package]] -name = "dashmap" -version = "6.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" -dependencies = [ - "cfg-if", - "crossbeam-utils", - "hashbrown 0.14.5", - "lock_api", - "once_cell", - "parking_lot_core", -] - -[[package]] -name = "datafusion-catalog" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "itertools", - "log", - "object_store", - "parking_lot", - "tokio", -] - -[[package]] -name = "datafusion-catalog-listing" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "futures", - "itertools", - "log", - "object_store", -] - -[[package]] -name = "datafusion-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "ahash", - "arrow", - "arrow-ipc", - "chrono", - "half", - "hashbrown 0.16.1", - "indexmap", - "itertools", - "libc", - "log", - "object_store", - "parquet", - "paste", - "tokio", - "web-time", -] - -[[package]] -name = "datafusion-common-runtime" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "futures", - "log", - "tokio", -] - -[[package]] -name = "datafusion-datasource" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "bytes", - "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "glob", - "itertools", - "log", - "object_store", - "rand", - "tokio", - "url", -] - -[[package]] -name = "datafusion-datasource-arrow" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "arrow-ipc", - "async-trait", - "bytes", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "itertools", - "object_store", - "tokio", -] - -[[package]] -name = "datafusion-datasource-csv" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "object_store", - "regex", - "tokio", -] - -[[package]] -name = "datafusion-datasource-json" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "object_store", - "serde_json", - "tokio", - "tokio-stream", -] - -[[package]] -name = "datafusion-datasource-parquet" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-pruning", - "datafusion-session", - "futures", - "itertools", - "log", - "object_store", - "parking_lot", - "parquet", - "tokio", -] - -[[package]] -name = "datafusion-doc" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" - -[[package]] -name = "datafusion-execution" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "arrow-buffer", - "async-trait", - "chrono", - "dashmap", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr-common", - "futures", - "log", - "object_store", - "parking_lot", - "rand", - "tempfile", - "url", -] - -[[package]] -name = "datafusion-expr" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr-common", - "indexmap", - "itertools", - "paste", - "serde_json", - "sqlparser", -] - -[[package]] -name = "datafusion-expr-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "datafusion-common", - "indexmap", - "itertools", - "paste", -] - -[[package]] -name = "datafusion-ffi" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "abi_stable", - "arrow", - "arrow-schema", - "async-ffi", - "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-proto", - "datafusion-proto-common", - "datafusion-session", - "futures", - "log", - "prost", - "semver", - "tokio", -] - -[[package]] -name = "datafusion-ffi-example" -version = "0.2.0" -dependencies = [ - "arrow", - "arrow-array", - "arrow-schema", - "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-expr", - "datafusion-ffi", - "datafusion-functions-aggregate", - "datafusion-functions-window", - "pyo3", - "pyo3-build-config", - "pyo3-log", -] - -[[package]] -name = "datafusion-functions" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "arrow-buffer", - "base64", - "chrono", - "chrono-tz", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-macros", - "hex", - "itertools", - "log", - "memchr", - "num-traits", - "rand", - "regex", - "unicode-segmentation", - "uuid", -] - -[[package]] -name = "datafusion-functions-aggregate" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "half", - "log", - "num-traits", - "paste", -] - -[[package]] -name = "datafusion-functions-aggregate-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", - "datafusion-physical-expr-common", -] - -[[package]] -name = "datafusion-functions-table" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-plan", - "parking_lot", - "paste", -] - -[[package]] -name = "datafusion-functions-window" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "log", - "paste", -] - -[[package]] -name = "datafusion-functions-window-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "datafusion-common", - "datafusion-physical-expr-common", -] - -[[package]] -name = "datafusion-macros" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "datafusion-doc", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "datafusion-physical-expr" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", - "half", - "hashbrown 0.16.1", - "indexmap", - "itertools", - "parking_lot", - "paste", - "petgraph", - "tokio", -] - -[[package]] -name = "datafusion-physical-expr-adapter" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-functions", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "itertools", -] - -[[package]] -name = "datafusion-physical-expr-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "ahash", - "arrow", - "chrono", - "datafusion-common", - "datafusion-expr-common", - "hashbrown 0.16.1", - "indexmap", - "itertools", - "parking_lot", -] - -[[package]] -name = "datafusion-physical-plan" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "ahash", - "arrow", - "arrow-ord", - "arrow-schema", - "async-trait", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "futures", - "half", - "hashbrown 0.16.1", - "indexmap", - "itertools", - "log", - "num-traits", - "parking_lot", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "datafusion-proto" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "chrono", - "datafusion-catalog", - "datafusion-catalog-listing", - "datafusion-common", - "datafusion-datasource", - "datafusion-datasource-arrow", - "datafusion-datasource-csv", - "datafusion-datasource-json", - "datafusion-datasource-parquet", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-table", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-proto-common", - "object_store", - "prost", - "rand", -] - -[[package]] -name = "datafusion-proto-common" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "datafusion-common", - "prost", -] - -[[package]] -name = "datafusion-pruning" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "arrow", - "datafusion-common", - "datafusion-datasource", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "itertools", - "log", -] - -[[package]] -name = "datafusion-session" -version = "53.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=35749607f585b3bf25b66b7d2289c56c18d03e4f#35749607f585b3bf25b66b7d2289c56c18d03e4f" -dependencies = [ - "async-trait", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-plan", - "parking_lot", -] - -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - -[[package]] -name = "errno" -version = "0.3.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" -dependencies = [ - "libc", - "windows-sys", -] - -[[package]] -name = "fastrand" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" - -[[package]] -name = "find-msvc-tools" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" - -[[package]] -name = "fixedbitset" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" - -[[package]] -name = "flatbuffers" -version = "25.12.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" -dependencies = [ - "bitflags", - "rustc_version", -] - -[[package]] -name = "flate2" -version = "1.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" -dependencies = [ - "miniz_oxide", - "zlib-rs", -] - -[[package]] -name = "foldhash" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" - -[[package]] -name = "foldhash" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" - -[[package]] -name = "form_urlencoded" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "futures" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" - -[[package]] -name = "futures-executor" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" - -[[package]] -name = "futures-macro" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "futures-sink" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" - -[[package]] -name = "futures-task" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" - -[[package]] -name = "futures-util" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "slab", -] - -[[package]] -name = "generational-arena" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877e94aff08e743b651baaea359664321055749b398adff8740a7399af7796e7" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "getrandom" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "getrandom" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasip2", -] - -[[package]] -name = "getrandom" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasip2", - "wasip3", -] - -[[package]] -name = "glob" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" - -[[package]] -name = "half" -version = "2.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" -dependencies = [ - "cfg-if", - "crunchy", - "num-traits", - "zerocopy", -] - -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" - -[[package]] -name = "hashbrown" -version = "0.15.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" -dependencies = [ - "foldhash 0.1.5", -] - -[[package]] -name = "hashbrown" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" -dependencies = [ - "allocator-api2", - "equivalent", - "foldhash 0.2.0", -] - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "http" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" -dependencies = [ - "bytes", - "itoa", -] - -[[package]] -name = "humantime" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" - -[[package]] -name = "iana-time-zone" -version = "0.1.65" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "icu_collections" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" -dependencies = [ - "displaydoc", - "potential_utf", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locale_core" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_normalizer" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" -dependencies = [ - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" - -[[package]] -name = "icu_properties" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" -dependencies = [ - "icu_collections", - "icu_locale_core", - "icu_properties_data", - "icu_provider", - "zerotrie", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" - -[[package]] -name = "icu_provider" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" -dependencies = [ - "displaydoc", - "icu_locale_core", - "writeable", - "yoke", - "zerofrom", - "zerotrie", - "zerovec", -] - -[[package]] -name = "id-arena" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" - -[[package]] -name = "idna" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" -dependencies = [ - "icu_normalizer", - "icu_properties", -] - -[[package]] -name = "indexmap" -version = "2.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" -dependencies = [ - "equivalent", - "hashbrown 0.16.1", - "serde", - "serde_core", -] - -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - -[[package]] -name = "itertools" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" - -[[package]] -name = "jobserver" -version = "0.1.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" -dependencies = [ - "getrandom 0.3.4", - "libc", -] - -[[package]] -name = "js-sys" -version = "0.3.85" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" -dependencies = [ - "once_cell", - "wasm-bindgen", -] - -[[package]] -name = "leb128fmt" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" - -[[package]] -name = "lexical-core" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" -dependencies = [ - "lexical-parse-integer", - "lexical-util", -] - -[[package]] -name = "lexical-parse-integer" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" -dependencies = [ - "lexical-util", -] - -[[package]] -name = "lexical-util" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" - -[[package]] -name = "lexical-write-float" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" -dependencies = [ - "lexical-util", - "lexical-write-integer", -] - -[[package]] -name = "lexical-write-integer" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" -dependencies = [ - "lexical-util", -] - -[[package]] -name = "libc" -version = "0.2.182" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" - -[[package]] -name = "libloading" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" -dependencies = [ - "cfg-if", - "winapi", -] - -[[package]] -name = "libm" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" - -[[package]] -name = "linux-raw-sys" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" - -[[package]] -name = "litemap" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" - -[[package]] -name = "lock_api" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" -dependencies = [ - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" - -[[package]] -name = "lz4_flex" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" -dependencies = [ - "twox-hash", -] - -[[package]] -name = "memchr" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" - -[[package]] -name = "miniz_oxide" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" -dependencies = [ - "adler2", - "simd-adler32", -] - -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "object_store" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2858065e55c148d294a9f3aae3b0fa9458edadb41a108397094566f4e3c0dfb" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures", - "http", - "humantime", - "itertools", - "parking_lot", - "percent-encoding", - "thiserror", - "tokio", - "tracing", - "url", - "walkdir", - "wasm-bindgen-futures", - "web-time", -] - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "ordered-float" -version = "2.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" -dependencies = [ - "num-traits", -] - -[[package]] -name = "parking_lot" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-link", -] - -[[package]] -name = "parquet" -version = "58.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f491d0ef1b510194426ee67ddc18a9b747ef3c42050c19322a2cd2e1666c29b" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", - "base64", - "brotli", - "bytes", - "chrono", - "flate2", - "futures", - "half", - "hashbrown 0.16.1", - "lz4_flex", - "num-bigint", - "num-integer", - "num-traits", - "object_store", - "paste", - "seq-macro", - "simdutf8", - "snap", - "thrift", - "tokio", - "twox-hash", - "zstd", -] - -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - -[[package]] -name = "percent-encoding" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" - -[[package]] -name = "petgraph" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" -dependencies = [ - "fixedbitset", - "hashbrown 0.15.5", - "indexmap", - "serde", -] - -[[package]] -name = "phf" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_shared" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" -dependencies = [ - "siphasher", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" - -[[package]] -name = "pkg-config" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" - -[[package]] -name = "portable-atomic" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" - -[[package]] -name = "potential_utf" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" -dependencies = [ - "zerovec", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "prettyplease" -version = "0.2.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" -dependencies = [ - "proc-macro2", - "syn 2.0.117", -] - -[[package]] -name = "proc-macro2" -version = "1.0.106" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "prost" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" -dependencies = [ - "bytes", - "prost-derive", -] - -[[package]] -name = "prost-derive" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" -dependencies = [ - "anyhow", - "itertools", - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "pyo3" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1" -dependencies = [ - "libc", - "once_cell", - "portable-atomic", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", -] - -[[package]] -name = "pyo3-build-config" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7" -dependencies = [ - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-log" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c2ec80932c5c3b2d4fbc578c9b56b2d4502098587edb8bef5b6bfcad43682e" -dependencies = [ - "arc-swap", - "log", - "pyo3", -] - -[[package]] -name = "pyo3-macros" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a" -dependencies = [ - "heck", - "proc-macro2", - "pyo3-build-config", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "quote" -version = "1.0.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "r-efi" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" - -[[package]] -name = "rand" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" -dependencies = [ - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" -dependencies = [ - "getrandom 0.3.4", -] - -[[package]] -name = "redox_syscall" -version = "0.5.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" -dependencies = [ - "bitflags", -] - -[[package]] -name = "regex" -version = "1.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" - -[[package]] -name = "repr_offset" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb1070755bd29dffc19d0971cab794e607839ba2ef4b69a9e6fbc8733c1b72ea" -dependencies = [ - "tstr", -] - -[[package]] -name = "rustc_version" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" -dependencies = [ - "semver", -] - -[[package]] -name = "rustix" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] - -[[package]] -name = "rustversion" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" - -[[package]] -name = "ryu" -version = "1.0.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "semver" -version = "1.0.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" - -[[package]] -name = "seq-macro" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" - -[[package]] -name = "serde" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" -dependencies = [ - "serde_core", - "serde_derive", -] - -[[package]] -name = "serde_core" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "serde_json" -version = "1.0.149" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" -dependencies = [ - "itoa", - "memchr", - "serde", - "serde_core", - "zmij", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "simd-adler32" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" - -[[package]] -name = "simdutf8" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" - -[[package]] -name = "siphasher" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" - -[[package]] -name = "slab" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" - -[[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" - -[[package]] -name = "snap" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" - -[[package]] -name = "sqlparser" -version = "0.61.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" -dependencies = [ - "log", - "sqlparser_derive", -] - -[[package]] -name = "sqlparser_derive" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.117" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "synstructure" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "target-lexicon" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" - -[[package]] -name = "tempfile" -version = "3.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" -dependencies = [ - "fastrand", - "getrandom 0.4.1", - "once_cell", - "rustix", - "windows-sys", -] - -[[package]] -name = "thiserror" -version = "2.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "2.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float", -] - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "tinystr" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" -dependencies = [ - "displaydoc", - "zerovec", -] - -[[package]] -name = "tokio" -version = "1.49.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" -dependencies = [ - "bytes", - "pin-project-lite", - "tokio-macros", -] - -[[package]] -name = "tokio-macros" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "tokio-stream" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", - "tokio-util", -] - -[[package]] -name = "tokio-util" -version = "0.7.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "tracing" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" -dependencies = [ - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "tracing-core" -version = "0.1.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" -dependencies = [ - "once_cell", -] - -[[package]] -name = "tstr" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f8e0294f14baae476d0dd0a2d780b2e24d66e349a9de876f5126777a37bdba7" -dependencies = [ - "tstr_proc_macros", -] - -[[package]] -name = "tstr_proc_macros" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e78122066b0cb818b8afd08f7ed22f7fdbc3e90815035726f0840d0d26c0747a" - -[[package]] -name = "twox-hash" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" - -[[package]] -name = "typed-arena" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" - -[[package]] -name = "typewit" -version = "1.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c1ae7cc0fdb8b842d65d127cb981574b0d2b249b74d1c7a2986863dc134f71" - -[[package]] -name = "unicode-ident" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" - -[[package]] -name = "unicode-segmentation" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" - -[[package]] -name = "unicode-width" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" - -[[package]] -name = "unicode-xid" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" - -[[package]] -name = "url" -version = "2.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", - "serde", -] - -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - -[[package]] -name = "uuid" -version = "1.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" -dependencies = [ - "getrandom 0.4.1", - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" - -[[package]] -name = "wasip2" -version = "1.0.2+wasi-0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" -dependencies = [ - "wit-bindgen", -] - -[[package]] -name = "wasip3" -version = "0.4.0+wasi-0.3.0-rc-2026-01-06" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" -dependencies = [ - "wit-bindgen", -] - -[[package]] -name = "wasm-bindgen" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" -dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-futures" -version = "0.4.58" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" -dependencies = [ - "cfg-if", - "futures-util", - "js-sys", - "once_cell", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" -dependencies = [ - "bumpalo", - "proc-macro2", - "quote", - "syn 2.0.117", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "wasm-encoder" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" -dependencies = [ - "leb128fmt", - "wasmparser", -] - -[[package]] -name = "wasm-metadata" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" -dependencies = [ - "anyhow", - "indexmap", - "wasm-encoder", - "wasmparser", -] - -[[package]] -name = "wasmparser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" -dependencies = [ - "bitflags", - "hashbrown 0.15.5", - "indexmap", - "semver", -] - -[[package]] -name = "web-sys" -version = "0.3.85" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "web-time" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-core" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link", - "windows-result", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "windows-interface" -version = "0.59.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "windows-link" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" - -[[package]] -name = "windows-result" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-strings" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-sys" -version = "0.61.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" -dependencies = [ - "windows-link", -] - -[[package]] -name = "wit-bindgen" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" -dependencies = [ - "wit-bindgen-rust-macro", -] - -[[package]] -name = "wit-bindgen-core" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" -dependencies = [ - "anyhow", - "heck", - "wit-parser", -] - -[[package]] -name = "wit-bindgen-rust" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" -dependencies = [ - "anyhow", - "heck", - "indexmap", - "prettyplease", - "syn 2.0.117", - "wasm-metadata", - "wit-bindgen-core", - "wit-component", -] - -[[package]] -name = "wit-bindgen-rust-macro" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" -dependencies = [ - "anyhow", - "prettyplease", - "proc-macro2", - "quote", - "syn 2.0.117", - "wit-bindgen-core", - "wit-bindgen-rust", -] - -[[package]] -name = "wit-component" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" -dependencies = [ - "anyhow", - "bitflags", - "indexmap", - "log", - "serde", - "serde_derive", - "serde_json", - "wasm-encoder", - "wasm-metadata", - "wasmparser", - "wit-parser", -] - -[[package]] -name = "wit-parser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" -dependencies = [ - "anyhow", - "id-arena", - "indexmap", - "log", - "semver", - "serde", - "serde_derive", - "serde_json", - "unicode-xid", - "wasmparser", -] - -[[package]] -name = "writeable" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" - -[[package]] -name = "yoke" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" -dependencies = [ - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", - "synstructure", -] - -[[package]] -name = "zerocopy" -version = "0.8.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "zerofrom" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", - "synstructure", -] - -[[package]] -name = "zerotrie" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", -] - -[[package]] -name = "zerovec" -version = "0.11.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - -[[package]] -name = "zlib-rs" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c745c48e1007337ed136dc99df34128b9faa6ed542d80a1c673cf55a6d7236c8" - -[[package]] -name = "zmij" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" - -[[package]] -name = "zstd" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "7.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" -dependencies = [ - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "2.0.16+zstd.1.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" -dependencies = [ - "cc", - "pkg-config", -] diff --git a/examples/datafusion-ffi-example/Cargo.toml b/examples/datafusion-ffi-example/Cargo.toml deleted file mode 100644 index be6096faf..000000000 --- a/examples/datafusion-ffi-example/Cargo.toml +++ /dev/null @@ -1,56 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "datafusion-ffi-example" -version = "0.2.0" -edition = "2024" - -[dependencies] -datafusion-catalog = { version = "53", default-features = false } -datafusion-common = { version = "53", default-features = false } -datafusion-functions-aggregate = { version = "53" } -datafusion-functions-window = { version = "53" } -datafusion-expr = { version = "53" } -datafusion-ffi = { version = "53" } - -pyo3 = { version = "0.28", features = [ - "extension-module", - "abi3", - "abi3-py39", -] } -arrow = { version = "58" } -arrow-array = { version = "58" } -arrow-schema = { version = "58" } -async-trait = "0.1.89" -pyo3-log = "0.13.2" - -[build-dependencies] -pyo3-build-config = "0.28" - -[lib] -name = "datafusion_ffi_example" -crate-type = ["cdylib", "rlib"] - -# TODO: remove when datafusion-53 is released -[patch.crates-io] -datafusion-catalog = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" } -datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" } -datafusion-functions-aggregate = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" } -datafusion-functions-window = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" } -datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" } -datafusion-ffi = { git = "https://github.com/apache/datafusion.git", rev = "35749607f585b3bf25b66b7d2289c56c18d03e4f" } diff --git a/examples/datafusion-ffi-example/build.rs b/examples/datafusion-ffi-example/build.rs deleted file mode 100644 index 4878d8b0e..000000000 --- a/examples/datafusion-ffi-example/build.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -fn main() { - pyo3_build_config::add_extension_module_link_args(); -} diff --git a/examples/datafusion-ffi-example/pyproject.toml b/examples/datafusion-ffi-example/pyproject.toml deleted file mode 100644 index 7f85e9487..000000000 --- a/examples/datafusion-ffi-example/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[build-system] -requires = ["maturin>=1.6,<2.0"] -build-backend = "maturin" - -[project] -name = "datafusion_ffi_example" -requires-python = ">=3.9" -classifiers = [ - "Programming Language :: Rust", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", -] -dynamic = ["version"] - -[tool.maturin] -features = ["pyo3/extension-module"] diff --git a/examples/datafusion-ffi-example/python/tests/_test_aggregate_udf.py b/examples/datafusion-ffi-example/python/tests/_test_aggregate_udf.py deleted file mode 100644 index 7ea6b295c..000000000 --- a/examples/datafusion-ffi-example/python/tests/_test_aggregate_udf.py +++ /dev/null @@ -1,77 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import annotations - -import pyarrow as pa -from datafusion import SessionContext, col, udaf -from datafusion_ffi_example import MySumUDF - - -def setup_context_with_table(): - ctx = SessionContext() - - # Pick numbers here so we get the same value in both groups - # since we cannot be certain of the output order of batches - batch = pa.RecordBatch.from_arrays( - [ - pa.array([1, 2, 3, None], type=pa.int64()), - pa.array([1, 1, 2, 2], type=pa.int64()), - ], - names=["a", "b"], - ) - ctx.register_record_batches("test_table", [[batch]]) - return ctx - - -def test_ffi_aggregate_register(): - ctx = setup_context_with_table() - my_udaf = udaf(MySumUDF()) - ctx.register_udaf(my_udaf) - - result = ctx.sql("select my_custom_sum(a) from test_table group by b").collect() - - assert len(result) == 2 - assert result[0].num_columns == 1 - - result = [r.column(0) for r in result] - expected = [ - pa.array([3], type=pa.int64()), - pa.array([3], type=pa.int64()), - ] - - assert result == expected - - -def test_ffi_aggregate_call_directly(): - ctx = setup_context_with_table() - my_udaf = udaf(MySumUDF()) - - result = ( - ctx.table("test_table").aggregate([col("b")], [my_udaf(col("a"))]).collect() - ) - - assert len(result) == 2 - assert result[0].num_columns == 2 - - result = [r.column(1) for r in result] - expected = [ - pa.array([3], type=pa.int64()), - pa.array([3], type=pa.int64()), - ] - - assert result == expected diff --git a/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py b/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py deleted file mode 100644 index a862b23ba..000000000 --- a/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py +++ /dev/null @@ -1,136 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import annotations - -import pyarrow as pa -import pyarrow.dataset as ds -import pytest -from datafusion import SessionContext, Table -from datafusion.catalog import Schema -from datafusion_ffi_example import MyCatalogProvider, MyCatalogProviderList - - -def create_test_dataset() -> Table: - """Create a simple test dataset.""" - batch = pa.RecordBatch.from_arrays( - [pa.array([100, 200, 300]), pa.array([1.1, 2.2, 3.3])], - names=["id", "value"], - ) - dataset = ds.dataset([batch]) - return Table(dataset) - - -@pytest.mark.parametrize("inner_capsule", [True, False]) -def test_ffi_catalog_provider_list(inner_capsule: bool) -> None: - """Test basic FFI CatalogProviderList functionality.""" - ctx = SessionContext() - - # Register FFI catalog - catalog_provider_list = MyCatalogProviderList() - if inner_capsule: - catalog_provider_list = ( - catalog_provider_list.__datafusion_catalog_provider_list__(ctx) - ) - - ctx.register_catalog_provider_list(catalog_provider_list) - - # Verify the catalog exists - catalog = ctx.catalog("auto_ffi_catalog") - schema_names = catalog.names() - assert "my_schema" in schema_names - - ctx.register_catalog_provider("second", MyCatalogProvider()) - - assert ctx.catalog_names() == {"auto_ffi_catalog", "second"} - - -@pytest.mark.parametrize("inner_capsule", [True, False]) -def test_ffi_catalog_provider_basic(inner_capsule: bool) -> None: - """Test basic FFI CatalogProvider functionality.""" - ctx = SessionContext() - - # Register FFI catalog - catalog_provider = MyCatalogProvider() - if inner_capsule: - catalog_provider = catalog_provider.__datafusion_catalog_provider__(ctx) - - ctx.register_catalog_provider("ffi_catalog", catalog_provider) - - # Verify the catalog exists - catalog = ctx.catalog("ffi_catalog") - schema_names = catalog.names() - assert "my_schema" in schema_names - - # Query the pre-populated table - result = ctx.sql("SELECT * FROM ffi_catalog.my_schema.my_table").collect() - assert len(result) == 2 - assert result[0].num_columns == 2 - - -def test_ffi_catalog_provider_register_schema(): - """Test registering additional schemas to FFI CatalogProvider.""" - ctx = SessionContext() - - catalog_provider = MyCatalogProvider() - ctx.register_catalog_provider("ffi_catalog", catalog_provider) - - catalog = ctx.catalog("ffi_catalog") - - # Register a new memory schema - new_schema = Schema.memory_schema() - catalog.register_schema("additional_schema", new_schema) - - # Verify the schema was registered - assert "additional_schema" in catalog.names() - - # Add a table to the new schema - new_schema.register_table("new_table", create_test_dataset()) - - # Query the new table - result = ctx.sql("SELECT * FROM ffi_catalog.additional_schema.new_table").collect() - assert len(result) == 1 - assert result[0].column(0) == pa.array([100, 200, 300]) - - -def test_ffi_catalog_provider_deregister_schema(): - """Test deregistering schemas from FFI CatalogProvider.""" - ctx = SessionContext() - - catalog_provider = MyCatalogProvider() - ctx.register_catalog_provider("ffi_catalog", catalog_provider) - - catalog = ctx.catalog("ffi_catalog") - - # Register two schemas - schema1 = Schema.memory_schema() - schema2 = Schema.memory_schema() - catalog.register_schema("temp_schema1", schema1) - catalog.register_schema("temp_schema2", schema2) - - # Verify both exist - names = catalog.names() - assert "temp_schema1" in names - assert "temp_schema2" in names - - # Deregister one schema - catalog.deregister_schema("temp_schema1") - - # Verify it's gone - names = catalog.names() - assert "temp_schema1" not in names - assert "temp_schema2" in names diff --git a/examples/datafusion-ffi-example/python/tests/_test_scalar_udf.py b/examples/datafusion-ffi-example/python/tests/_test_scalar_udf.py deleted file mode 100644 index 0c949c34a..000000000 --- a/examples/datafusion-ffi-example/python/tests/_test_scalar_udf.py +++ /dev/null @@ -1,70 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import annotations - -import pyarrow as pa -from datafusion import SessionContext, col, udf -from datafusion_ffi_example import IsNullUDF - - -def setup_context_with_table(): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3, None])], - names=["a"], - ) - ctx.register_record_batches("test_table", [[batch]]) - return ctx - - -def test_ffi_scalar_register(): - ctx = setup_context_with_table() - my_udf = udf(IsNullUDF()) - ctx.register_udf(my_udf) - - result = ctx.sql("select my_custom_is_null(a) from test_table").collect() - - assert len(result) == 1 - assert result[0].num_columns == 1 - print(result) - - result = [r.column(0) for r in result] - expected = [ - pa.array([False, False, False, True], type=pa.bool_()), - ] - - assert result == expected - - -def test_ffi_scalar_call_directly(): - ctx = setup_context_with_table() - my_udf = udf(IsNullUDF()) - - result = ctx.table("test_table").select(my_udf(col("a"))).collect() - - assert len(result) == 1 - assert result[0].num_columns == 1 - print(result) - - result = [r.column(0) for r in result] - expected = [ - pa.array([False, False, False, True], type=pa.bool_()), - ] - - assert result == expected diff --git a/examples/datafusion-ffi-example/python/tests/_test_schema_provider.py b/examples/datafusion-ffi-example/python/tests/_test_schema_provider.py deleted file mode 100644 index 93449c660..000000000 --- a/examples/datafusion-ffi-example/python/tests/_test_schema_provider.py +++ /dev/null @@ -1,232 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import annotations - -import pyarrow as pa -import pyarrow.dataset as ds -import pytest -from datafusion import SessionContext, Table -from datafusion.catalog import Schema -from datafusion_ffi_example import FixedSchemaProvider, MyCatalogProvider - - -def create_test_dataset() -> Table: - """Create a simple test dataset.""" - batch = pa.RecordBatch.from_arrays( - [pa.array([100, 200, 300]), pa.array([1.1, 2.2, 3.3])], - names=["id", "value"], - ) - dataset = ds.dataset([batch]) - return Table(dataset) - - -@pytest.mark.parametrize("inner_capsule", [True, False]) -def test_schema_provider_extract_values(inner_capsule: bool) -> None: - ctx = SessionContext() - - my_schema_name = "my_schema" - - schema_provider = FixedSchemaProvider() - if inner_capsule: - schema_provider = schema_provider.__datafusion_schema_provider__(ctx) - - ctx.catalog().register_schema(my_schema_name, schema_provider) - - expected_schema_name = "my_schema" - expected_table_name = "my_table" - expected_table_columns = ["units", "price"] - - default_catalog = ctx.catalog() - - catalog_schemas = default_catalog.names() - assert expected_schema_name in catalog_schemas - my_schema = default_catalog.schema(expected_schema_name) - assert expected_table_name in my_schema.names() - my_table = my_schema.table(expected_table_name) - assert expected_table_columns == my_table.schema.names - - result = ctx.table(f"{expected_schema_name}.{expected_table_name}").collect() - assert len(result) == 2 - - col0_result = [r.column(0) for r in result] - col1_result = [r.column(1) for r in result] - expected_col0 = [ - pa.array([10, 20, 30], type=pa.int32()), - pa.array([5, 7], type=pa.int32()), - ] - expected_col1 = [ - pa.array([1, 2, 5], type=pa.float64()), - pa.array([1.5, 2.5], type=pa.float64()), - ] - assert col0_result == expected_col0 - assert col1_result == expected_col1 - - -def test_ffi_schema_provider_basic(): - """Test basic FFI SchemaProvider functionality.""" - ctx = SessionContext() - - # Register FFI schema - schema_provider = FixedSchemaProvider() - ctx.catalog().register_schema("ffi_schema", schema_provider) - - # Verify the schema exists - schema = ctx.catalog().schema("ffi_schema") - table_names = schema.names() - assert "my_table" in table_names - - # Query the pre-populated table - result = ctx.sql("SELECT * FROM ffi_schema.my_table").collect() - assert len(result) == 2 - assert result[0].num_columns == 2 - - -def test_ffi_schema_provider_register_table(): - """Test registering additional tables to FFI SchemaProvider.""" - ctx = SessionContext() - - schema_provider = FixedSchemaProvider() - ctx.catalog().register_schema("ffi_schema", schema_provider) - - schema = ctx.catalog().schema("ffi_schema") - - # Register a new table - schema.register_table("additional_table", create_test_dataset()) - - # Verify the table was registered - assert "additional_table" in schema.names() - - # Query the new table - result = ctx.sql("SELECT * FROM ffi_schema.additional_table").collect() - assert len(result) == 1 - assert result[0].column(0) == pa.array([100, 200, 300]) - assert result[0].column(1) == pa.array([1.1, 2.2, 3.3]) - - -def test_ffi_schema_provider_deregister_table(): - """Test deregistering tables from FFI SchemaProvider.""" - ctx = SessionContext() - - schema_provider = FixedSchemaProvider() - ctx.catalog().register_schema("ffi_schema", schema_provider) - - schema = ctx.catalog().schema("ffi_schema") - - # Register two tables - schema.register_table("temp_table1", create_test_dataset()) - schema.register_table("temp_table2", create_test_dataset()) - - # Verify both exist - names = schema.names() - assert "temp_table1" in names - assert "temp_table2" in names - - # Deregister one table - schema.deregister_table("temp_table1") - - # Verify it's gone - names = schema.names() - assert "temp_table1" not in names - assert "temp_table2" in names - - -def test_mixed_ffi_and_python_providers(): - """Test mixing FFI and Python providers in the same catalog/schema.""" - ctx = SessionContext() - - # Register FFI catalog - ffi_catalog = MyCatalogProvider() - ctx.register_catalog_provider("ffi_catalog", ffi_catalog) - - # Register Python memory schema to FFI catalog - python_schema = Schema.memory_schema() - ctx.catalog("ffi_catalog").register_schema("python_schema", python_schema) - - # Add table to Python schema - python_schema.register_table("python_table", create_test_dataset()) - - # Query both FFI table and Python table - result_ffi = ctx.sql("SELECT * FROM ffi_catalog.my_schema.my_table").collect() - assert len(result_ffi) == 2 - - result_python = ctx.sql( - "SELECT * FROM ffi_catalog.python_schema.python_table" - ).collect() - assert len(result_python) == 1 - assert result_python[0].column(0) == pa.array([100, 200, 300]) - - -def test_ffi_catalog_with_multiple_schemas(): - """Test FFI catalog with multiple schemas of different types.""" - ctx = SessionContext() - - catalog_provider = MyCatalogProvider() - ctx.register_catalog_provider("multi_catalog", catalog_provider) - - catalog = ctx.catalog("multi_catalog") - - # Register different types of schemas - ffi_schema = FixedSchemaProvider() - memory_schema = Schema.memory_schema() - - catalog.register_schema("ffi_schema", ffi_schema) - catalog.register_schema("memory_schema", memory_schema) - - # Add tables to memory schema - memory_schema.register_table("mem_table", create_test_dataset()) - - # Verify all schemas exist - names = catalog.names() - assert "my_schema" in names # Pre-populated - assert "ffi_schema" in names - assert "memory_schema" in names - - # Query tables from each schema - result = ctx.sql("SELECT * FROM multi_catalog.my_schema.my_table").collect() - assert len(result) == 2 - - result = ctx.sql("SELECT * FROM multi_catalog.ffi_schema.my_table").collect() - assert len(result) == 2 - - result = ctx.sql("SELECT * FROM multi_catalog.memory_schema.mem_table").collect() - assert len(result) == 1 - assert result[0].column(0) == pa.array([100, 200, 300]) - - -def test_ffi_schema_table_exist(): - """Test table_exist method on FFI SchemaProvider.""" - ctx = SessionContext() - - schema_provider = FixedSchemaProvider() - ctx.catalog().register_schema("ffi_schema", schema_provider) - - schema = ctx.catalog().schema("ffi_schema") - - # Check pre-populated table - assert schema.table_exist("my_table") - - # Check non-existent table - assert not schema.table_exist("nonexistent_table") - - # Register a new table and check - schema.register_table("new_table", create_test_dataset()) - assert schema.table_exist("new_table") - - # Deregister and check - schema.deregister_table("new_table") - assert not schema.table_exist("new_table") diff --git a/examples/datafusion-ffi-example/python/tests/_test_table_function.py b/examples/datafusion-ffi-example/python/tests/_test_table_function.py deleted file mode 100644 index bf5aae3bd..000000000 --- a/examples/datafusion-ffi-example/python/tests/_test_table_function.py +++ /dev/null @@ -1,135 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import annotations - -from typing import TYPE_CHECKING - -import pyarrow as pa -from datafusion import Expr, SessionContext, udtf -from datafusion_ffi_example import MyTableFunction, MyTableProvider - -if TYPE_CHECKING: - from datafusion.context import TableProviderExportable - - -def test_ffi_table_function_register() -> None: - ctx = SessionContext() - table_func = MyTableFunction() - - table_udtf = udtf(table_func, "my_table_func") - ctx.register_udtf(table_udtf) - result = ctx.sql("select * from my_table_func()").collect() - - assert len(result) == 2 - assert result[0].num_columns == 4 - print(result) - - result = [r.column(0) for r in result] - expected = [ - pa.array([0, 1, 2], type=pa.int32()), - pa.array([3, 4, 5, 6], type=pa.int32()), - ] - - assert result == expected - - -def test_ffi_table_function_call_directly(): - ctx = SessionContext() - table_func = MyTableFunction() - table_udtf = udtf(table_func, "my_table_func") - - my_table = table_udtf() - ctx.register_table("t", my_table) - result = ctx.table("t").collect() - - assert len(result) == 2 - assert result[0].num_columns == 4 - print(result) - - result = [r.column(0) for r in result] - expected = [ - pa.array([0, 1, 2], type=pa.int32()), - pa.array([3, 4, 5, 6], type=pa.int32()), - ] - - assert result == expected - - -class PythonTableFunction: - """Python based table function. - - This class is used as a Python implementation of a table function. - We use the existing TableProvider to create the underlying - provider, and this function takes no arguments - """ - - def __call__( - self, num_cols: Expr, num_rows: Expr, num_batches: Expr - ) -> TableProviderExportable: - args = [ - num_cols.to_variant().value_i64(), - num_rows.to_variant().value_i64(), - num_batches.to_variant().value_i64(), - ] - return MyTableProvider(*args) - - -def common_table_function_test(test_ctx: SessionContext) -> None: - result = test_ctx.sql("select * from my_table_func(3,2,4)").collect() - - assert len(result) == 4 - assert result[0].num_columns == 3 - print(result) - - result = [r.column(0) for r in result] - expected = [ - pa.array([0, 1], type=pa.int32()), - pa.array([2, 3, 4], type=pa.int32()), - pa.array([4, 5, 6, 7], type=pa.int32()), - pa.array([6, 7, 8, 9, 10], type=pa.int32()), - ] - - assert result == expected - - -def test_python_table_function(): - ctx = SessionContext() - table_func = PythonTableFunction() - table_udtf = udtf(table_func, "my_table_func") - ctx.register_udtf(table_udtf) - - common_table_function_test(ctx) - - -def test_python_table_function_decorator(): - ctx = SessionContext() - - @udtf("my_table_func") - def my_udtf( - num_cols: Expr, num_rows: Expr, num_batches: Expr - ) -> TableProviderExportable: - args = [ - num_cols.to_variant().value_i64(), - num_rows.to_variant().value_i64(), - num_batches.to_variant().value_i64(), - ] - return MyTableProvider(*args) - - ctx.register_udtf(my_udtf) - - common_table_function_test(ctx) diff --git a/examples/datafusion-ffi-example/python/tests/_test_table_provider.py b/examples/datafusion-ffi-example/python/tests/_test_table_provider.py deleted file mode 100644 index fc77d2d3b..000000000 --- a/examples/datafusion-ffi-example/python/tests/_test_table_provider.py +++ /dev/null @@ -1,51 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import annotations - -import pyarrow as pa -import pytest -from datafusion import SessionContext -from datafusion_ffi_example import MyTableProvider - - -@pytest.mark.parametrize("inner_capsule", [True, False]) -def test_table_provider_ffi(inner_capsule: bool) -> None: - ctx = SessionContext() - table = MyTableProvider(3, 2, 4) - if inner_capsule: - table = table.__datafusion_table_provider__(ctx) - - ctx.register_table("t", table) - result = ctx.table("t").collect() - - assert len(result) == 4 - assert result[0].num_columns == 3 - - result = [r.column(0) for r in result] - expected = [ - pa.array([0, 1], type=pa.int32()), - pa.array([2, 3, 4], type=pa.int32()), - pa.array([4, 5, 6, 7], type=pa.int32()), - pa.array([6, 7, 8, 9, 10], type=pa.int32()), - ] - - assert result == expected - - result = ctx.read_table(table).collect() - result = [r.column(0) for r in result] - assert result == expected diff --git a/examples/datafusion-ffi-example/python/tests/_test_window_udf.py b/examples/datafusion-ffi-example/python/tests/_test_window_udf.py deleted file mode 100644 index 7d96994b9..000000000 --- a/examples/datafusion-ffi-example/python/tests/_test_window_udf.py +++ /dev/null @@ -1,89 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import annotations - -import pyarrow as pa -from datafusion import SessionContext, col, udwf -from datafusion_ffi_example import MyRankUDF - - -def setup_context_with_table(): - ctx = SessionContext() - - # Pick numbers here so we get the same value in both groups - # since we cannot be certain of the output order of batches - batch = pa.RecordBatch.from_arrays( - [ - pa.array([40, 10, 30, 20], type=pa.int64()), - ], - names=["a"], - ) - ctx.register_record_batches("test_table", [[batch]]) - return ctx - - -def test_ffi_window_register(): - ctx = setup_context_with_table() - my_udwf = udwf(MyRankUDF()) - ctx.register_udwf(my_udwf) - - result = ctx.sql( - "select a, my_custom_rank() over (order by a) from test_table" - ).collect() - assert len(result) == 1 - assert result[0].num_columns == 2 - - results = [ - (result[0][0][idx].as_py(), result[0][1][idx].as_py()) for idx in range(4) - ] - results.sort() - - expected = [ - (10, 1), - (20, 2), - (30, 3), - (40, 4), - ] - assert results == expected - - -def test_ffi_window_call_directly(): - ctx = setup_context_with_table() - my_udwf = udwf(MyRankUDF()) - - result = ( - ctx.table("test_table") - .select(col("a"), my_udwf().order_by(col("a")).build()) - .collect() - ) - - assert len(result) == 1 - assert result[0].num_columns == 2 - - results = [ - (result[0][0][idx].as_py(), result[0][1][idx].as_py()) for idx in range(4) - ] - results.sort() - - expected = [ - (10, 1), - (20, 2), - (30, 3), - (40, 4), - ] - assert results == expected diff --git a/examples/datafusion-ffi-example/python/tests/conftest.py b/examples/datafusion-ffi-example/python/tests/conftest.py deleted file mode 100644 index 68f8057af..000000000 --- a/examples/datafusion-ffi-example/python/tests/conftest.py +++ /dev/null @@ -1,42 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import annotations - -import logging -from typing import TYPE_CHECKING - -import pytest - -if TYPE_CHECKING: - from collections.abc import Generator - from typing import Any - - -class _FailOnWarning(logging.Handler): - def emit(self, record: logging.LogRecord) -> None: - if record.levelno >= logging.WARNING: - err = f"Unexpected log warning from '{record.name}': {self.format(record)}" - raise AssertionError(err) - - -@pytest.fixture(autouse=True) -def fail_on_log_warnings() -> Generator[None, Any, None]: - handler = _FailOnWarning() - logging.root.addHandler(handler) - yield - logging.root.removeHandler(handler) diff --git a/examples/datafusion-ffi-example/src/aggregate_udf.rs b/examples/datafusion-ffi-example/src/aggregate_udf.rs deleted file mode 100644 index d5343ff91..000000000 --- a/examples/datafusion-ffi-example/src/aggregate_udf.rs +++ /dev/null @@ -1,87 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::any::Any; -use std::sync::Arc; - -use arrow_schema::DataType; -use datafusion_common::error::Result as DataFusionResult; -use datafusion_expr::function::AccumulatorArgs; -use datafusion_expr::{Accumulator, AggregateUDF, AggregateUDFImpl, Signature}; -use datafusion_ffi::udaf::FFI_AggregateUDF; -use datafusion_functions_aggregate::sum::Sum; -use pyo3::types::PyCapsule; -use pyo3::{Bound, PyResult, Python, pyclass, pymethods}; - -#[pyclass( - from_py_object, - name = "MySumUDF", - module = "datafusion_ffi_example", - subclass -)] -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub(crate) struct MySumUDF { - inner: Arc, -} - -#[pymethods] -impl MySumUDF { - #[new] - fn new() -> PyResult { - Ok(Self { - inner: Arc::new(Sum::new()), - }) - } - - fn __datafusion_aggregate_udf__<'py>( - &self, - py: Python<'py>, - ) -> PyResult> { - let name = cr"datafusion_aggregate_udf".into(); - - let func = Arc::new(AggregateUDF::from(self.clone())); - let provider = FFI_AggregateUDF::from(func); - - PyCapsule::new(py, provider, Some(name)) - } -} - -impl AggregateUDFImpl for MySumUDF { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "my_custom_sum" - } - - fn signature(&self) -> &Signature { - self.inner.signature() - } - - fn return_type(&self, arg_types: &[DataType]) -> DataFusionResult { - self.inner.return_type(arg_types) - } - - fn accumulator(&self, acc_args: AccumulatorArgs) -> DataFusionResult> { - self.inner.accumulator(acc_args) - } - - fn coerce_types(&self, arg_types: &[DataType]) -> DataFusionResult> { - self.inner.coerce_types(arg_types) - } -} diff --git a/examples/datafusion-ffi-example/src/catalog_provider.rs b/examples/datafusion-ffi-example/src/catalog_provider.rs deleted file mode 100644 index d0e07c787..000000000 --- a/examples/datafusion-ffi-example/src/catalog_provider.rs +++ /dev/null @@ -1,273 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::any::Any; -use std::fmt::Debug; -use std::sync::Arc; - -use arrow::datatypes::Schema; -use async_trait::async_trait; -use datafusion_catalog::{ - CatalogProvider, CatalogProviderList, MemTable, MemoryCatalogProvider, - MemoryCatalogProviderList, MemorySchemaProvider, SchemaProvider, TableProvider, -}; -use datafusion_common::error::{DataFusionError, Result}; -use datafusion_ffi::catalog_provider::FFI_CatalogProvider; -use datafusion_ffi::catalog_provider_list::FFI_CatalogProviderList; -use datafusion_ffi::schema_provider::FFI_SchemaProvider; -use pyo3::types::PyCapsule; -use pyo3::{Bound, PyAny, PyResult, Python, pyclass, pymethods}; - -use crate::utils::ffi_logical_codec_from_pycapsule; - -pub fn my_table() -> Arc { - use arrow::datatypes::{DataType, Field}; - use datafusion_common::record_batch; - - let schema = Arc::new(Schema::new(vec![ - Field::new("units", DataType::Int32, true), - Field::new("price", DataType::Float64, true), - ])); - - let partitions = vec![ - record_batch!( - ("units", Int32, vec![10, 20, 30]), - ("price", Float64, vec![1.0, 2.0, 5.0]) - ) - .unwrap(), - record_batch!( - ("units", Int32, vec![5, 7]), - ("price", Float64, vec![1.5, 2.5]) - ) - .unwrap(), - ]; - - Arc::new(MemTable::try_new(schema, vec![partitions]).unwrap()) -} - -#[pyclass( - skip_from_py_object, - name = "FixedSchemaProvider", - module = "datafusion_ffi_example", - subclass -)] -#[derive(Debug)] -pub struct FixedSchemaProvider { - inner: Arc, -} - -impl Default for FixedSchemaProvider { - fn default() -> Self { - let inner = Arc::new(MemorySchemaProvider::new()); - - let table = my_table(); - - let _ = inner.register_table("my_table".to_string(), table).unwrap(); - - Self { inner } - } -} - -#[pymethods] -impl FixedSchemaProvider { - #[new] - pub fn new() -> Self { - Self::default() - } - - pub fn __datafusion_schema_provider__<'py>( - &self, - py: Python<'py>, - session: Bound, - ) -> PyResult> { - let name = cr"datafusion_schema_provider".into(); - - let provider = Arc::clone(&self.inner) as Arc; - - let codec = ffi_logical_codec_from_pycapsule(session)?; - let provider = FFI_SchemaProvider::new_with_ffi_codec(provider, None, codec); - - PyCapsule::new(py, provider, Some(name)) - } -} - -#[async_trait] -impl SchemaProvider for FixedSchemaProvider { - fn as_any(&self) -> &dyn Any { - self - } - - fn table_names(&self) -> Vec { - self.inner.table_names() - } - - async fn table(&self, name: &str) -> Result>, DataFusionError> { - self.inner.table(name).await - } - - fn register_table( - &self, - name: String, - table: Arc, - ) -> Result>> { - self.inner.register_table(name, table) - } - - fn deregister_table(&self, name: &str) -> Result>> { - self.inner.deregister_table(name) - } - - fn table_exist(&self, name: &str) -> bool { - self.inner.table_exist(name) - } -} - -/// This catalog provider is intended only for unit tests. It prepopulates with one -/// schema and only allows for schemas named after four types of fruit. -#[pyclass( - skip_from_py_object, - name = "MyCatalogProvider", - module = "datafusion_ffi_example", - subclass -)] -#[derive(Debug, Clone)] -pub(crate) struct MyCatalogProvider { - inner: Arc, -} - -impl CatalogProvider for MyCatalogProvider { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema_names(&self) -> Vec { - self.inner.schema_names() - } - - fn schema(&self, name: &str) -> Option> { - self.inner.schema(name) - } - - fn register_schema( - &self, - name: &str, - schema: Arc, - ) -> Result>> { - self.inner.register_schema(name, schema) - } - - fn deregister_schema( - &self, - name: &str, - cascade: bool, - ) -> Result>> { - self.inner.deregister_schema(name, cascade) - } -} - -#[pymethods] -impl MyCatalogProvider { - #[new] - pub fn new() -> PyResult { - let inner = Arc::new(MemoryCatalogProvider::new()); - - let schema_name: &str = "my_schema"; - let _ = inner.register_schema(schema_name, Arc::new(FixedSchemaProvider::default())); - - Ok(Self { inner }) - } - - pub fn __datafusion_catalog_provider__<'py>( - &self, - py: Python<'py>, - session: Bound, - ) -> PyResult> { - let name = cr"datafusion_catalog_provider".into(); - - let provider = Arc::clone(&self.inner) as Arc; - - let codec = ffi_logical_codec_from_pycapsule(session)?; - let provider = FFI_CatalogProvider::new_with_ffi_codec(provider, None, codec); - - PyCapsule::new(py, provider, Some(name)) - } -} - -/// This catalog provider list is intended only for unit tests. -/// It pre-populates with a single catalog. -#[pyclass( - skip_from_py_object, - name = "MyCatalogProviderList", - module = "datafusion_ffi_example", - subclass -)] -#[derive(Debug, Clone)] -pub(crate) struct MyCatalogProviderList { - inner: Arc, -} - -impl CatalogProviderList for MyCatalogProviderList { - fn as_any(&self) -> &dyn Any { - self - } - - fn catalog_names(&self) -> Vec { - self.inner.catalog_names() - } - - fn catalog(&self, name: &str) -> Option> { - self.inner.catalog(name) - } - - fn register_catalog( - &self, - name: String, - catalog: Arc, - ) -> Option> { - self.inner.register_catalog(name, catalog) - } -} - -#[pymethods] -impl MyCatalogProviderList { - #[new] - pub fn new() -> PyResult { - let inner = Arc::new(MemoryCatalogProviderList::new()); - - inner.register_catalog( - "auto_ffi_catalog".to_owned(), - Arc::new(MyCatalogProvider::new()?), - ); - - Ok(Self { inner }) - } - - pub fn __datafusion_catalog_provider_list__<'py>( - &self, - py: Python<'py>, - session: Bound, - ) -> PyResult> { - let name = cr"datafusion_catalog_provider_list".into(); - - let provider = Arc::clone(&self.inner) as Arc; - - let codec = ffi_logical_codec_from_pycapsule(session)?; - let provider = FFI_CatalogProviderList::new_with_ffi_codec(provider, None, codec); - - PyCapsule::new(py, provider, Some(name)) - } -} diff --git a/examples/datafusion-ffi-example/src/lib.rs b/examples/datafusion-ffi-example/src/lib.rs deleted file mode 100644 index 23f2001a2..000000000 --- a/examples/datafusion-ffi-example/src/lib.rs +++ /dev/null @@ -1,48 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use pyo3::prelude::*; - -use crate::aggregate_udf::MySumUDF; -use crate::catalog_provider::{FixedSchemaProvider, MyCatalogProvider, MyCatalogProviderList}; -use crate::scalar_udf::IsNullUDF; -use crate::table_function::MyTableFunction; -use crate::table_provider::MyTableProvider; -use crate::window_udf::MyRankUDF; - -pub(crate) mod aggregate_udf; -pub(crate) mod catalog_provider; -pub(crate) mod scalar_udf; -pub(crate) mod table_function; -pub(crate) mod table_provider; -pub(crate) mod utils; -pub(crate) mod window_udf; - -#[pymodule] -fn datafusion_ffi_example(m: &Bound<'_, PyModule>) -> PyResult<()> { - pyo3_log::init(); - - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - Ok(()) -} diff --git a/examples/datafusion-ffi-example/src/scalar_udf.rs b/examples/datafusion-ffi-example/src/scalar_udf.rs deleted file mode 100644 index 374924781..000000000 --- a/examples/datafusion-ffi-example/src/scalar_udf.rs +++ /dev/null @@ -1,97 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::any::Any; -use std::sync::Arc; - -use arrow_array::{Array, BooleanArray}; -use arrow_schema::DataType; -use datafusion_common::ScalarValue; -use datafusion_common::error::Result as DataFusionResult; -use datafusion_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, TypeSignature, - Volatility, -}; -use datafusion_ffi::udf::FFI_ScalarUDF; -use pyo3::types::PyCapsule; -use pyo3::{Bound, PyResult, Python, pyclass, pymethods}; - -#[pyclass( - from_py_object, - name = "IsNullUDF", - module = "datafusion_ffi_example", - subclass -)] -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct IsNullUDF { - signature: Signature, -} - -#[pymethods] -impl IsNullUDF { - #[new] - fn new() -> Self { - Self { - signature: Signature::new(TypeSignature::Any(1), Volatility::Immutable), - } - } - - fn __datafusion_scalar_udf__<'py>(&self, py: Python<'py>) -> PyResult> { - let name = cr"datafusion_scalar_udf".into(); - - let func = Arc::new(ScalarUDF::from(self.clone())); - let provider = FFI_ScalarUDF::from(func); - - PyCapsule::new(py, provider, Some(name)) - } -} - -impl ScalarUDFImpl for IsNullUDF { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "my_custom_is_null" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, _arg_types: &[DataType]) -> DataFusionResult { - Ok(DataType::Boolean) - } - - fn invoke_with_args(&self, args: ScalarFunctionArgs) -> DataFusionResult { - let input = &args.args[0]; - - Ok(match input { - ColumnarValue::Array(arr) => match arr.is_nullable() { - true => { - let nulls = arr.nulls().unwrap(); - let nulls = BooleanArray::from_iter(nulls.iter().map(|x| Some(!x))); - ColumnarValue::Array(Arc::new(nulls)) - } - false => ColumnarValue::Scalar(ScalarValue::Boolean(Some(false))), - }, - ColumnarValue::Scalar(sv) => { - ColumnarValue::Scalar(ScalarValue::Boolean(Some(sv == &ScalarValue::Null))) - } - }) - } -} diff --git a/examples/datafusion-ffi-example/src/table_function.rs b/examples/datafusion-ffi-example/src/table_function.rs deleted file mode 100644 index 0914e161c..000000000 --- a/examples/datafusion-ffi-example/src/table_function.rs +++ /dev/null @@ -1,66 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use datafusion_catalog::{TableFunctionImpl, TableProvider}; -use datafusion_common::error::Result as DataFusionResult; -use datafusion_expr::Expr; -use datafusion_ffi::udtf::FFI_TableFunction; -use pyo3::types::PyCapsule; -use pyo3::{Bound, PyAny, PyResult, Python, pyclass, pymethods}; - -use crate::table_provider::MyTableProvider; -use crate::utils::ffi_logical_codec_from_pycapsule; - -#[pyclass( - from_py_object, - name = "MyTableFunction", - module = "datafusion_ffi_example", - subclass -)] -#[derive(Debug, Clone)] -pub(crate) struct MyTableFunction {} - -#[pymethods] -impl MyTableFunction { - #[new] - fn new() -> Self { - Self {} - } - - fn __datafusion_table_function__<'py>( - &self, - py: Python<'py>, - session: Bound, - ) -> PyResult> { - let name = cr"datafusion_table_function".into(); - - let func = self.clone(); - let codec = ffi_logical_codec_from_pycapsule(session)?; - let provider = FFI_TableFunction::new_with_ffi_codec(Arc::new(func), None, codec); - - PyCapsule::new(py, provider, Some(name)) - } -} - -impl TableFunctionImpl for MyTableFunction { - fn call(&self, _args: &[Expr]) -> DataFusionResult> { - let provider = MyTableProvider::new(4, 3, 2).create_table()?; - Ok(Arc::new(provider)) - } -} diff --git a/examples/datafusion-ffi-example/src/table_provider.rs b/examples/datafusion-ffi-example/src/table_provider.rs deleted file mode 100644 index 2c79e6ef9..000000000 --- a/examples/datafusion-ffi-example/src/table_provider.rs +++ /dev/null @@ -1,115 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use arrow_array::{ArrayRef, RecordBatch}; -use arrow_schema::{DataType, Field, Schema}; -use datafusion_catalog::MemTable; -use datafusion_common::error::{DataFusionError, Result as DataFusionResult}; -use datafusion_ffi::table_provider::FFI_TableProvider; -use pyo3::exceptions::PyRuntimeError; -use pyo3::types::PyCapsule; -use pyo3::{Bound, PyAny, PyResult, Python, pyclass, pymethods}; - -use crate::utils::ffi_logical_codec_from_pycapsule; - -/// In order to provide a test that demonstrates different sized record batches, -/// the first batch will have num_rows, the second batch num_rows+1, and so on. -#[pyclass( - from_py_object, - name = "MyTableProvider", - module = "datafusion_ffi_example", - subclass -)] -#[derive(Clone)] -pub(crate) struct MyTableProvider { - num_cols: usize, - num_rows: usize, - num_batches: usize, -} - -fn create_record_batch( - schema: &Arc, - num_cols: usize, - start_value: i32, - num_values: usize, -) -> DataFusionResult { - let end_value = start_value + num_values as i32; - let row_values: Vec = (start_value..end_value).collect(); - - let columns: Vec<_> = (0..num_cols) - .map(|_| Arc::new(arrow::array::Int32Array::from(row_values.clone())) as ArrayRef) - .collect(); - - RecordBatch::try_new(Arc::clone(schema), columns).map_err(DataFusionError::from) -} - -impl MyTableProvider { - pub fn create_table(&self) -> DataFusionResult { - let fields: Vec<_> = (0..self.num_cols) - .map(|idx| (b'A' + idx as u8) as char) - .map(|col_name| Field::new(col_name, DataType::Int32, true)) - .collect(); - - let schema = Arc::new(Schema::new(fields)); - - let batches: DataFusionResult> = (0..self.num_batches) - .map(|batch_idx| { - let start_value = batch_idx * self.num_rows; - create_record_batch( - &schema, - self.num_cols, - start_value as i32, - self.num_rows + batch_idx, - ) - }) - .collect(); - - MemTable::try_new(schema, vec![batches?]) - } -} - -#[pymethods] -impl MyTableProvider { - #[new] - pub fn new(num_cols: usize, num_rows: usize, num_batches: usize) -> Self { - Self { - num_cols, - num_rows, - num_batches, - } - } - - pub fn __datafusion_table_provider__<'py>( - &self, - py: Python<'py>, - session: Bound, - ) -> PyResult> { - let name = cr"datafusion_table_provider".into(); - - let provider = self - .create_table() - .map_err(|e: DataFusionError| PyRuntimeError::new_err(e.to_string()))?; - - let codec = ffi_logical_codec_from_pycapsule(session)?; - let provider = - FFI_TableProvider::new_with_ffi_codec(Arc::new(provider), false, None, codec); - - PyCapsule::new(py, provider, Some(name)) - } -} diff --git a/examples/datafusion-ffi-example/src/utils.rs b/examples/datafusion-ffi-example/src/utils.rs deleted file mode 100644 index 5f2865aa2..000000000 --- a/examples/datafusion-ffi-example/src/utils.rs +++ /dev/null @@ -1,64 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::ptr::NonNull; - -use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; -use pyo3::exceptions::PyValueError; -use pyo3::ffi::c_str; -use pyo3::prelude::{PyAnyMethods, PyCapsuleMethods}; -use pyo3::types::PyCapsule; -use pyo3::{Bound, PyAny, PyResult}; - -pub(crate) fn ffi_logical_codec_from_pycapsule( - obj: Bound, -) -> PyResult { - let attr_name = "__datafusion_logical_extension_codec__"; - let capsule = if obj.hasattr(attr_name)? { - obj.getattr(attr_name)?.call0()? - } else { - obj - }; - - let capsule = capsule.cast::()?; - validate_pycapsule(capsule, "datafusion_logical_extension_codec")?; - - let data: NonNull = capsule - .pointer_checked(Some(c_str!("datafusion_logical_extension_codec")))? - .cast(); - let codec = unsafe { data.as_ref() }; - - Ok(codec.clone()) -} - -pub(crate) fn validate_pycapsule(capsule: &Bound, name: &str) -> PyResult<()> { - let capsule_name = capsule.name()?; - if capsule_name.is_none() { - return Err(PyValueError::new_err(format!( - "Expected {name} PyCapsule to have name set." - ))); - } - - let capsule_name = unsafe { capsule_name.unwrap().as_cstr().to_str()? }; - if capsule_name != name { - return Err(PyValueError::new_err(format!( - "Expected name '{name}' in PyCapsule, instead got '{capsule_name}'" - ))); - } - - Ok(()) -} diff --git a/examples/datafusion-ffi-example/src/window_udf.rs b/examples/datafusion-ffi-example/src/window_udf.rs deleted file mode 100644 index cbf179a86..000000000 --- a/examples/datafusion-ffi-example/src/window_udf.rs +++ /dev/null @@ -1,87 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::any::Any; -use std::sync::Arc; - -use arrow_schema::{DataType, FieldRef}; -use datafusion_common::error::Result as DataFusionResult; -use datafusion_expr::function::{PartitionEvaluatorArgs, WindowUDFFieldArgs}; -use datafusion_expr::{PartitionEvaluator, Signature, WindowUDF, WindowUDFImpl}; -use datafusion_ffi::udwf::FFI_WindowUDF; -use datafusion_functions_window::rank::rank_udwf; -use pyo3::types::PyCapsule; -use pyo3::{Bound, PyResult, Python, pyclass, pymethods}; - -#[pyclass( - from_py_object, - name = "MyRankUDF", - module = "datafusion_ffi_example", - subclass -)] -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub(crate) struct MyRankUDF { - inner: Arc, -} - -#[pymethods] -impl MyRankUDF { - #[new] - fn new() -> PyResult { - Ok(Self { inner: rank_udwf() }) - } - - fn __datafusion_window_udf__<'py>(&self, py: Python<'py>) -> PyResult> { - let name = cr"datafusion_window_udf".into(); - - let func = Arc::new(WindowUDF::from(self.clone())); - let provider = FFI_WindowUDF::from(func); - - PyCapsule::new(py, provider, Some(name)) - } -} - -impl WindowUDFImpl for MyRankUDF { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "my_custom_rank" - } - - fn signature(&self) -> &Signature { - self.inner.signature() - } - - fn partition_evaluator( - &self, - partition_evaluator_args: PartitionEvaluatorArgs, - ) -> DataFusionResult> { - self.inner - .inner() - .partition_evaluator(partition_evaluator_args) - } - - fn field(&self, field_args: WindowUDFFieldArgs) -> DataFusionResult { - self.inner.inner().field(field_args) - } - - fn coerce_types(&self, arg_types: &[DataType]) -> DataFusionResult> { - self.inner.coerce_types(arg_types) - } -} diff --git a/examples/export.py b/examples/export.py deleted file mode 100644 index c7a387bcb..000000000 --- a/examples/export.py +++ /dev/null @@ -1,52 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import datafusion - -# create a context -ctx = datafusion.SessionContext() - -# create a new datafusion DataFrame -df = ctx.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]}) -# Dataframe: -# +---+---+ -# | a | b | -# +---+---+ -# | 1 | 4 | -# | 2 | 5 | -# | 3 | 6 | -# +---+---+ - -# export to pandas dataframe -pandas_df = df.to_pandas() -assert pandas_df.shape == (3, 2) - -# export to PyArrow table -arrow_table = df.to_arrow_table() -assert arrow_table.shape == (3, 2) - -# export to Polars dataframe -polars_df = df.to_polars() -assert polars_df.shape == (3, 2) - -# export to Python list of rows -pylist = df.to_pylist() -assert pylist == [{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}] - -# export to Python dictionary of columns -pydict = df.to_pydict() -assert pydict == {"a": [1, 2, 3], "b": [4, 5, 6]} diff --git a/examples/import.py b/examples/import.py deleted file mode 100644 index 7b5ab5082..000000000 --- a/examples/import.py +++ /dev/null @@ -1,57 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import datafusion -import pandas as pd -import polars as pl -import pyarrow as pa - -# Create a context -ctx = datafusion.SessionContext() - -# Create a datafusion DataFrame from a Python dictionary -# The dictionary keys represent column names and the dictionary values -# represent column values -df = ctx.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]}) -assert type(df) is datafusion.DataFrame -# Dataframe: -# +---+---+ -# | a | b | -# +---+---+ -# | 1 | 4 | -# | 2 | 5 | -# | 3 | 6 | -# +---+---+ - -# Create a datafusion DataFrame from a Python list of rows -df = ctx.from_pylist([{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}]) -assert type(df) is datafusion.DataFrame - -# Convert pandas DataFrame to datafusion DataFrame -pandas_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) -df = ctx.from_pandas(pandas_df) -assert type(df) is datafusion.DataFrame - -# Convert polars DataFrame to datafusion DataFrame -polars_df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) -df = ctx.from_polars(polars_df) -assert type(df) is datafusion.DataFrame - -# Convert Arrow Table to datafusion DataFrame -arrow_table = pa.Table.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]}) -df = ctx.from_arrow(arrow_table) -assert type(df) is datafusion.DataFrame diff --git a/examples/python-udaf.py b/examples/python-udaf.py deleted file mode 100644 index 6655edb0a..000000000 --- a/examples/python-udaf.py +++ /dev/null @@ -1,69 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import datafusion -import pyarrow as pa -import pyarrow.compute -from datafusion import Accumulator, col, udaf - - -class MyAccumulator(Accumulator): - """ - Interface of a user-defined accumulation. - """ - - def __init__(self) -> None: - self._sum = pa.scalar(0.0) - - def update(self, values: pa.Array) -> None: - # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pa.scalar(self._sum.as_py() + pa.compute.sum(values).as_py()) - - def merge(self, states: pa.Array) -> None: - # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pa.scalar(self._sum.as_py() + pa.compute.sum(states).as_py()) - - def state(self) -> pa.Array: - return pa.array([self._sum.as_py()]) - - def evaluate(self) -> pa.Scalar: - return self._sum - - -# create a context -ctx = datafusion.SessionContext() - -# create a RecordBatch and a new DataFrame from it -batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], -) -df = ctx.create_dataframe([[batch]]) - -my_udaf = udaf( - MyAccumulator, - pa.float64(), - pa.float64(), - [pa.float64()], - "stable", -) - -df = df.aggregate([], [my_udaf(col("a"))]) - -result = df.collect()[0] - -assert result.column(0) == pa.array([6.0]) diff --git a/examples/python-udf-comparisons.py b/examples/python-udf-comparisons.py deleted file mode 100644 index b870645a3..000000000 --- a/examples/python-udf-comparisons.py +++ /dev/null @@ -1,185 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import time -from pathlib import Path - -import pyarrow as pa -import pyarrow.compute as pc -from datafusion import SessionContext, col, lit, udf -from datafusion import functions as F - -path = Path(__file__).parent.resolve() -filepath = path / "./tpch/data/lineitem.parquet" - -# This example serves to demonstrate alternate approaches to answering the -# question "return all of the rows that have a specific combination of these -# values". We have the combinations we care about provided as a python -# list of tuples. There is no built in function that supports this operation, -# but it can be explicitly specified via a single expression or we can -# use a user defined function. - -ctx = SessionContext() - -# These part keys and suppliers are chosen because there are -# cases where two suppliers each have two of the part keys -# but we are interested in these specific combinations. - -values_of_interest = [ - (1530, 4031, "N"), - (6530, 1531, "N"), - (5618, 619, "N"), - (8118, 8119, "N"), -] - -partkeys = [lit(r[0]) for r in values_of_interest] -suppkeys = [lit(r[1]) for r in values_of_interest] -returnflags = [lit(r[2]) for r in values_of_interest] - -df_lineitem = ctx.read_parquet(filepath).select( - "l_partkey", "l_suppkey", "l_returnflag" -) - -start_time = time.time() - -df_simple_filter = df_lineitem.filter( - F.in_list(col("l_partkey"), partkeys), - F.in_list(col("l_suppkey"), suppkeys), - F.in_list(col("l_returnflag"), returnflags), -) - -num_rows = df_simple_filter.count() -print( - f"Simple filtering has number {num_rows} rows and took {time.time() - start_time} s" -) -print("This is the incorrect number of rows!") -start_time = time.time() - -# Explicitly check for the combinations of interest. -# This works but is not scalable. - -filter_expr = ( - ( - (col("l_partkey") == values_of_interest[0][0]) - & (col("l_suppkey") == values_of_interest[0][1]) - & (col("l_returnflag") == values_of_interest[0][2]) - ) - | ( - (col("l_partkey") == values_of_interest[1][0]) - & (col("l_suppkey") == values_of_interest[1][1]) - & (col("l_returnflag") == values_of_interest[1][2]) - ) - | ( - (col("l_partkey") == values_of_interest[2][0]) - & (col("l_suppkey") == values_of_interest[2][1]) - & (col("l_returnflag") == values_of_interest[2][2]) - ) - | ( - (col("l_partkey") == values_of_interest[3][0]) - & (col("l_suppkey") == values_of_interest[3][1]) - & (col("l_returnflag") == values_of_interest[3][2]) - ) -) - -df_explicit_filter = df_lineitem.filter(filter_expr) - -num_rows = df_explicit_filter.count() -print( - f"Explicit filtering has number {num_rows} rows and took {time.time() - start_time} s" -) -start_time = time.time() - -# Instead try a python UDF - - -def is_of_interest_impl( - partkey_arr: pa.Array, - suppkey_arr: pa.Array, - returnflag_arr: pa.Array, -) -> pa.Array: - result = [] - for idx, partkey_val in enumerate(partkey_arr): - partkey = partkey_val.as_py() - suppkey = suppkey_arr[idx].as_py() - returnflag = returnflag_arr[idx].as_py() - value = (partkey, suppkey, returnflag) - result.append(value in values_of_interest) - - return pa.array(result) - - -is_of_interest = udf( - is_of_interest_impl, - [pa.int64(), pa.int64(), pa.utf8()], - pa.bool_(), - "stable", -) - -df_udf_filter = df_lineitem.filter( - is_of_interest(col("l_partkey"), col("l_suppkey"), col("l_returnflag")) -) - -num_rows = df_udf_filter.count() -print(f"UDF filtering has number {num_rows} rows and took {time.time() - start_time} s") -start_time = time.time() - -# Now use a user defined function but lean on the built in pyarrow array -# functions so we never convert rows to python objects. - -# To see other pyarrow compute functions see -# https://arrow.apache.org/docs/python/api/compute.html -# -# It is important that the number of rows in the returned array -# matches the original array, so we cannot use functions like -# filtered_partkey_arr.filter(filtered_suppkey_arr). - - -def udf_using_pyarrow_compute_impl( - partkey_arr: pa.Array, - suppkey_arr: pa.Array, - returnflag_arr: pa.Array, -) -> pa.Array: - results = None - for partkey, suppkey, returnflag in values_of_interest: - filtered_partkey_arr = pc.equal(partkey_arr, partkey) - filtered_suppkey_arr = pc.equal(suppkey_arr, suppkey) - filtered_returnflag_arr = pc.equal(returnflag_arr, returnflag) - - resultant_arr = pc.and_(filtered_partkey_arr, filtered_suppkey_arr) - resultant_arr = pc.and_(resultant_arr, filtered_returnflag_arr) - - results = resultant_arr if results is None else pc.or_(results, resultant_arr) - - return results - - -udf_using_pyarrow_compute = udf( - udf_using_pyarrow_compute_impl, - [pa.int64(), pa.int64(), pa.utf8()], - pa.bool_(), - "stable", -) - -df_udf_pyarrow_compute = df_lineitem.filter( - udf_using_pyarrow_compute(col("l_partkey"), col("l_suppkey"), col("l_returnflag")) -) - -num_rows = df_udf_pyarrow_compute.count() -print( - f"UDF filtering using pyarrow compute has number {num_rows} rows and took {time.time() - start_time} s" -) -start_time = time.time() diff --git a/examples/python-udf.py b/examples/python-udf.py deleted file mode 100644 index 1c08acd1a..000000000 --- a/examples/python-udf.py +++ /dev/null @@ -1,43 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pyarrow as pa -from datafusion import SessionContext, udf -from datafusion import functions as f - - -def is_null(array: pa.Array) -> pa.Array: - return array.is_null() - - -is_null_arr = udf(is_null, [pa.int64()], pa.bool_(), "stable") - -# create a context -ctx = SessionContext() - -# create a RecordBatch and a new DataFrame from it -batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], -) -df = ctx.create_dataframe([[batch]]) - -df = df.select(is_null_arr(f.col("a"))) - -result = df.collect()[0] - -assert result.column(0) == pa.array([False] * 3) diff --git a/examples/python-udwf.py b/examples/python-udwf.py deleted file mode 100644 index 645ded188..000000000 --- a/examples/python-udwf.py +++ /dev/null @@ -1,274 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import annotations - -import datafusion -import pyarrow as pa -from datafusion import col, lit, udwf -from datafusion import functions as f -from datafusion.expr import WindowFrame -from datafusion.user_defined import WindowEvaluator - -# This example creates five different examples of user defined window functions in order -# to demonstrate the variety of ways a user may need to implement. - - -class ExponentialSmoothDefault(WindowEvaluator): - """Create a running smooth operation across an entire partition at once.""" - - def __init__(self, alpha: float) -> None: - self.alpha = alpha - - def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array: - results = [] - curr_value = 0.0 - values = values[0] - for idx in range(num_rows): - if idx == 0: - curr_value = values[idx].as_py() - else: - curr_value = values[idx].as_py() * self.alpha + curr_value * ( - 1.0 - self.alpha - ) - results.append(curr_value) - - return pa.array(results) - - -class SmoothBoundedFromPreviousRow(WindowEvaluator): - """Smooth over from the previous to current row only.""" - - def __init__(self, alpha: float) -> None: - self.alpha = alpha - - def supports_bounded_execution(self) -> bool: - return True - - def get_range(self, idx: int, num_rows: int) -> tuple[int, int]: # noqa: ARG002 - # Override the default range of current row since uses_window_frame is False - # So for the purpose of this test we just smooth from the previous row to - # current. - if idx == 0: - return (0, 0) - return (idx - 1, idx) - - def evaluate( - self, values: list[pa.Array], eval_range: tuple[int, int] - ) -> pa.Scalar: - (start, stop) = eval_range - curr_value = 0.0 - values = values[0] - for idx in range(start, stop + 1): - if idx == start: - curr_value = values[idx].as_py() - else: - curr_value = values[idx].as_py() * self.alpha + curr_value * ( - 1.0 - self.alpha - ) - return pa.scalar(curr_value).cast(pa.float64()) - - -class SmoothAcrossRank(WindowEvaluator): - """Smooth over the rank from the previous rank to current.""" - - def __init__(self, alpha: float) -> None: - self.alpha = alpha - - def include_rank(self) -> bool: - return True - - def evaluate_all_with_rank( - self, num_rows: int, ranks_in_partition: list[tuple[int, int]] - ) -> pa.Array: - results = [] - for idx in range(num_rows): - if idx == 0: - prior_value = 1.0 - matching_row = [ - i - for i in range(len(ranks_in_partition)) - if ranks_in_partition[i][0] <= idx and ranks_in_partition[i][1] > idx - ][0] + 1 - curr_value = matching_row * self.alpha + prior_value * (1.0 - self.alpha) - results.append(curr_value) - prior_value = matching_row - - return pa.array(results) - - -class ExponentialSmoothFrame(WindowEvaluator): - "Find the value across an entire frame using exponential smoothing" - - def __init__(self, alpha: float) -> None: - self.alpha = alpha - - def uses_window_frame(self) -> bool: - return True - - def evaluate( - self, values: list[pa.Array], eval_range: tuple[int, int] - ) -> pa.Scalar: - (start, stop) = eval_range - curr_value = 0.0 - if len(values) > 1: - order_by = values[1] # noqa: F841 - values = values[0] - else: - values = values[0] - for idx in range(start, stop): - if idx == start: - curr_value = values[idx].as_py() - else: - curr_value = values[idx].as_py() * self.alpha + curr_value * ( - 1.0 - self.alpha - ) - return pa.scalar(curr_value).cast(pa.float64()) - - -class SmoothTwoColumn(WindowEvaluator): - """Smooth once column based on a condition of another column. - - If the second column is above a threshold, then smooth over the first column from - the previous and next rows. - """ - - def __init__(self, alpha: float) -> None: - self.alpha = alpha - - def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array: - results = [] - values_a = values[0] - values_b = values[1] - for idx in range(num_rows): - if not values_b[idx].is_valid: - if idx == 0: - results.append(values_a[1].cast(pa.float64())) - elif idx == num_rows - 1: - results.append(values_a[num_rows - 2].cast(pa.float64())) - else: - results.append( - pa.scalar( - values_a[idx - 1].as_py() * self.alpha - + values_a[idx + 1].as_py() * (1.0 - self.alpha) - ) - ) - else: - results.append(values_a[idx].cast(pa.float64())) - - return pa.array(results) - - -# create a context -ctx = datafusion.SessionContext() - -# create a RecordBatch and a new DataFrame from it -batch = pa.RecordBatch.from_arrays( - [ - pa.array([1.0, 2.1, 2.9, 4.0, 5.1, 6.0, 6.9, 8.0]), - pa.array([1, 2, None, 4, 5, 6, None, 8]), - pa.array(["A", "A", "A", "A", "A", "B", "B", "B"]), - ], - names=["a", "b", "c"], -) -df = ctx.create_dataframe([[batch]]) - -exp_smooth = udwf( - lambda: ExponentialSmoothDefault(0.9), - pa.float64(), - pa.float64(), - volatility="immutable", -) - -smooth_two_row = udwf( - lambda: SmoothBoundedFromPreviousRow(0.9), - pa.float64(), - pa.float64(), - volatility="immutable", -) - -smooth_rank = udwf( - lambda: SmoothAcrossRank(0.9), - pa.float64(), - pa.float64(), - volatility="immutable", -) - -smooth_frame = udwf( - lambda: ExponentialSmoothFrame(0.9), - pa.float64(), - pa.float64(), - volatility="immutable", - name="smooth_frame", -) - -smooth_two_col = udwf( - lambda: SmoothTwoColumn(0.9), - [pa.float64(), pa.int64()], - pa.float64(), - volatility="immutable", -) - -# These are done with separate statements instead of one large `select` because that will -# attempt to combine the window operations and our defined UDFs do not all support that. -( - df.with_column("exp_smooth", exp_smooth(col("a"))) - .with_column("smooth_prior_row", smooth_two_row(col("a"))) - .with_column("smooth_rank", smooth_rank(col("a")).order_by(col("c")).build()) - .with_column("smooth_two_col", smooth_two_col(col("a"), col("b"))) - .with_column( - "smooth_frame", - smooth_frame(col("a")).window_frame(WindowFrame("rows", None, 0)).build(), - ) - .select( - "a", - "b", - "c", - "exp_smooth", - "smooth_prior_row", - "smooth_rank", - "smooth_two_col", - "smooth_frame", - ) -).show() - -assert df.select(f.round(exp_smooth(col("a")), lit(3))).collect()[0].column( - 0 -) == pa.array([1, 1.99, 2.809, 3.881, 4.978, 5.898, 6.8, 7.88]) - - -assert df.select(f.round(smooth_two_row(col("a")), lit(3))).collect()[0].column( - 0 -) == pa.array([1.0, 1.99, 2.82, 3.89, 4.99, 5.91, 6.81, 7.89]) - - -assert df.select(smooth_rank(col("a")).order_by(col("c")).build()).collect()[0].column( - 0 -) == pa.array([1, 1, 1, 1, 1, 1.9, 2.0, 2.0]) - - -assert df.select(smooth_two_col(col("a"), col("b"))).collect()[0].column(0) == pa.array( - [1, 2.1, 2.29, 4, 5.1, 6, 6.2, 8.0] -) - - -assert df.select( - f.round( - smooth_frame(col("a")).window_frame(WindowFrame("rows", None, 0)).build(), - lit(3), - ) -).collect()[0].column(0) == pa.array([1, 1.99, 2.809, 3.881, 4.978, 5.898, 6.8, 7.88]) diff --git a/examples/query-pyarrow-data.py b/examples/query-pyarrow-data.py deleted file mode 100644 index 9cfe8a62b..000000000 --- a/examples/query-pyarrow-data.py +++ /dev/null @@ -1,42 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import datafusion -import pyarrow as pa -from datafusion import col - -# create a context -ctx = datafusion.SessionContext() - -# create a RecordBatch and a new DataFrame from it -batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], -) -df = ctx.create_dataframe([[batch]]) - -# create a new statement -df = df.select( - col("a") + col("b"), - col("a") - col("b"), -) - -# execute and collect the first (and only) batch -result = df.collect()[0] - -assert result.column(0) == pa.array([5, 7, 9]) -assert result.column(1) == pa.array([-3, -3, -3]) diff --git a/examples/sql-parquet-s3.py b/examples/sql-parquet-s3.py deleted file mode 100644 index 866e2ac68..000000000 --- a/examples/sql-parquet-s3.py +++ /dev/null @@ -1,40 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import os - -import datafusion -from datafusion.object_store import AmazonS3 - -region = "us-east-1" -bucket_name = "yellow-trips" - -s3 = AmazonS3( - bucket_name=bucket_name, - region=region, - access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), - secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), -) - -ctx = datafusion.SessionContext() -path = f"s3://{bucket_name}/" -ctx.register_object_store("s3://", s3, None) - -ctx.register_parquet("trips", path) - -df = ctx.sql("select count(passenger_count) from trips") -df.show() diff --git a/examples/sql-parquet.py b/examples/sql-parquet.py deleted file mode 100644 index 3cc9fbd5a..000000000 --- a/examples/sql-parquet.py +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from datafusion import SessionContext - -ctx = SessionContext() -ctx.register_parquet("taxi", "yellow_tripdata_2021-01.parquet") -df = ctx.sql( - "select passenger_count, count(*) from taxi where passenger_count is not null group by passenger_count order by passenger_count" -) -df.show() diff --git a/examples/sql-to-pandas.py b/examples/sql-to-pandas.py deleted file mode 100644 index 34f7bde1b..000000000 --- a/examples/sql-to-pandas.py +++ /dev/null @@ -1,42 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from datafusion import SessionContext - -# Create a DataFusion context -ctx = SessionContext() - -# Register table with context -ctx.register_parquet("taxi", "yellow_tripdata_2021-01.parquet") - -# Execute SQL -df = ctx.sql( - "select passenger_count, count(*) " - "from taxi " - "where passenger_count is not null " - "group by passenger_count " - "order by passenger_count" -) - -# convert to Pandas -pandas_df = df.to_pandas() - -# create a chart -fig = pandas_df.plot( - kind="bar", title="Trip Count by Number of Passengers" -).get_figure() -fig.savefig("chart.png") diff --git a/examples/sql-using-python-udaf.py b/examples/sql-using-python-udaf.py deleted file mode 100644 index f42bbdc23..000000000 --- a/examples/sql-using-python-udaf.py +++ /dev/null @@ -1,86 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pyarrow as pa -from datafusion import Accumulator, SessionContext, udaf - - -# Define a user-defined aggregation function (UDAF) -class MyAccumulator(Accumulator): - """ - Interface of a user-defined accumulation. - """ - - def __init__(self) -> None: - self._sum = pa.scalar(0.0) - - def update(self, values: list[pa.Array]) -> None: - # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pa.scalar(self._sum.as_py() + pa.compute.sum(values).as_py()) - - def merge(self, states: pa.Array) -> None: - # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pa.scalar(self._sum.as_py() + pa.compute.sum(states[0]).as_py()) - - def state(self) -> list[pa.Array]: - return [self._sum] - - def evaluate(self) -> pa.Scalar: - return self._sum - - -my_udaf = udaf( - MyAccumulator, - pa.float64(), - pa.float64(), - [pa.float64()], - "stable", - # This will be the name of the UDAF in SQL - # If not specified it will by default the same as accumulator class name - name="my_accumulator", -) - -# Create a context -ctx = SessionContext() - -# Create a datafusion DataFrame from a Python dictionary -source_df = ctx.from_pydict({"a": [1, 1, 3], "b": [4, 5, 6]}, name="t") -# Dataframe: -# +---+---+ -# | a | b | -# +---+---+ -# | 1 | 4 | -# | 1 | 5 | -# | 3 | 6 | -# +---+---+ - -# Register UDF for use in SQL -ctx.register_udaf(my_udaf) - -# Query the DataFrame using SQL -result_df = ctx.sql( - "select a, my_accumulator(b) as b_aggregated from t group by a order by a" -) -# Dataframe: -# +---+--------------+ -# | a | b_aggregated | -# +---+--------------+ -# | 1 | 9 | -# | 3 | 6 | -# +---+--------------+ -assert result_df.to_pydict()["a"] == [1, 3] -assert result_df.to_pydict()["b_aggregated"] == [9, 6] diff --git a/examples/sql-using-python-udf.py b/examples/sql-using-python-udf.py deleted file mode 100644 index 2f0a0b67d..000000000 --- a/examples/sql-using-python-udf.py +++ /dev/null @@ -1,64 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pyarrow as pa -from datafusion import SessionContext, udf - - -# Define a user-defined function (UDF) -def is_null(array: pa.Array) -> pa.Array: - return array.is_null() - - -is_null_arr = udf( - is_null, - [pa.int64()], - pa.bool_(), - "stable", - # This will be the name of the UDF in SQL - # If not specified it will by default the same as Python function name - name="is_null", -) - -# Create a context -ctx = SessionContext() - -# Create a datafusion DataFrame from a Python dictionary -ctx.from_pydict({"a": [1, 2, 3], "b": [4, None, 6]}, name="t") -# Dataframe: -# +---+---+ -# | a | b | -# +---+---+ -# | 1 | 4 | -# | 2 | | -# | 3 | 6 | -# +---+---+ - -# Register UDF for use in SQL -ctx.register_udf(is_null_arr) - -# Query the DataFrame using SQL -result_df = ctx.sql("select a, is_null(b) as b_is_null from t") -# Dataframe: -# +---+-----------+ -# | a | b_is_null | -# +---+-----------+ -# | 1 | false | -# | 2 | true | -# | 3 | false | -# +---+-----------+ -assert result_df.to_pydict()["b_is_null"] == [False, True, False] diff --git a/examples/substrait.py b/examples/substrait.py deleted file mode 100644 index fa6f77912..000000000 --- a/examples/substrait.py +++ /dev/null @@ -1,49 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from datafusion import SessionContext -from datafusion import substrait as ss - -# Create a DataFusion context -ctx = SessionContext() - -# Register table with context -ctx.register_csv("aggregate_test_data", "./testing/data/csv/aggregate_test_100.csv") - -substrait_plan = ss.Serde.serialize_to_plan("SELECT * FROM aggregate_test_data", ctx) -# type(substrait_plan) -> - -# Encode it to bytes -substrait_bytes = substrait_plan.encode() -# type(substrait_bytes) -> , at this point the bytes can be distributed to file, network, etc safely -# where they could subsequently be deserialized on the receiving end. - -# Alternative serialization approaches -# type(substrait_bytes) -> , at this point the bytes can be distributed to file, network, etc safely -# where they could subsequently be deserialized on the receiving end. -substrait_bytes = ss.Serde.serialize_bytes("SELECT * FROM aggregate_test_data", ctx) - -# Imagine here bytes would be read from network, file, etc ... for example brevity this is omitted and variable is simply reused -# type(substrait_plan) -> -substrait_plan = ss.Serde.deserialize_bytes(substrait_bytes) - -# type(df_logical_plan) -> -df_logical_plan = ss.Consumer.from_substrait_plan(ctx, substrait_plan) - -# Back to Substrait Plan just for demonstration purposes -# type(substrait_plan) -> -substrait_plan = ss.Producer.to_substrait_plan(df_logical_plan, ctx) diff --git a/examples/tpch/.gitignore b/examples/tpch/.gitignore deleted file mode 100644 index 9e67bd47d..000000000 --- a/examples/tpch/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -data - diff --git a/examples/tpch/README.md b/examples/tpch/README.md deleted file mode 100644 index 7c52c8230..000000000 --- a/examples/tpch/README.md +++ /dev/null @@ -1,57 +0,0 @@ - - -# DataFusion Python Examples for TPC-H - -These examples reproduce the problems listed in the Transaction Process Council -TPC-H benchmark. The purpose of these examples is to demonstrate how to use -different aspects of Data Fusion and not necessarily geared towards creating the -most performant queries possible. Within each example is a description of the -problem. For users who are familiar with SQL style commands, you can compare the -approaches in these examples with those listed in the specification. - -- https://www.tpc.org/tpch/ - -The examples provided are based on version 2.18.0 of the TPC-H specification. - -## Data Setup - -To run these examples, you must first generate a dataset. The `dbgen` tool -provided by TPC can create datasets of arbitrary scale. For testing it is -typically sufficient to create a 1 gigabyte dataset. For convenience, this -repository has a script which uses docker to create this dataset. From the -`benchmarks/tpch` directory execute the following script. - -```bash -./tpch-gen.sh 1 -``` - -The examples provided use parquet files for the tables generated by `dbgen`. -A python script is provided to convert the text files from `dbgen` into parquet -files expected by the examples. From the `examples/tpch` directory you can -execute the following command to create the necessary parquet files. - -```bash -python convert_data_to_parquet.py -``` - -## Description of Examples - -For easier access, a description of the techniques demonstrated in each file -is in the README.md file in the `examples` directory. diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py deleted file mode 100644 index 780fcf5e5..000000000 --- a/examples/tpch/_tests.py +++ /dev/null @@ -1,128 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from importlib import import_module - -import pyarrow as pa -import pytest -from datafusion import DataFrame, col, lit -from datafusion import functions as F -from util import get_answer_file - - -def df_selection(col_name, col_type): - if col_type == pa.float64(): - return F.round(col(col_name), lit(2)).alias(col_name) - if isinstance(col_type, pa.Decimal128Type): - return F.round(col(col_name).cast(pa.float64()), lit(2)).alias(col_name) - if col_type == pa.string() or col_type == pa.string_view(): - return F.trim(col(col_name)).alias(col_name) - return col(col_name) - - -def load_schema(col_name, col_type): - if col_type == pa.int64() or col_type == pa.int32(): - return col_name, pa.string() - if isinstance(col_type, pa.Decimal128Type): - return col_name, pa.float64() - return col_name, col_type - - -def expected_selection(col_name, col_type): - if col_type == pa.int64() or col_type == pa.int32(): - return F.trim(col(col_name)).cast(col_type).alias(col_name) - if col_type == pa.string() or col_type == pa.string_view(): - return F.trim(col(col_name)).alias(col_name) - return col(col_name) - - -def selections_and_schema(original_schema): - columns = [(c, original_schema.field(c).type) for c in original_schema.names] - - df_selections = [df_selection(c, t) for (c, t) in columns] - expected_schema = [load_schema(c, t) for (c, t) in columns] - expected_selections = [expected_selection(c, t) for (c, t) in columns] - - return (df_selections, expected_schema, expected_selections) - - -def check_q17(df): - raw_value = float(df.collect()[0]["avg_yearly"][0].as_py()) - value = round(raw_value, 2) - assert abs(value - 348406.05) < 0.001 - - -@pytest.mark.parametrize( - ("query_code", "answer_file"), - [ - ("q01_pricing_summary_report", "q1"), - ("q02_minimum_cost_supplier", "q2"), - ("q03_shipping_priority", "q3"), - ("q04_order_priority_checking", "q4"), - ("q05_local_supplier_volume", "q5"), - ("q06_forecasting_revenue_change", "q6"), - ("q07_volume_shipping", "q7"), - ("q08_market_share", "q8"), - ("q09_product_type_profit_measure", "q9"), - ("q10_returned_item_reporting", "q10"), - ("q11_important_stock_identification", "q11"), - ("q12_ship_mode_order_priority", "q12"), - ("q13_customer_distribution", "q13"), - ("q14_promotion_effect", "q14"), - ("q15_top_supplier", "q15"), - ("q16_part_supplier_relationship", "q16"), - ("q17_small_quantity_order", "q17"), - ("q18_large_volume_customer", "q18"), - ("q19_discounted_revenue", "q19"), - ("q20_potential_part_promotion", "q20"), - ("q21_suppliers_kept_orders_waiting", "q21"), - ("q22_global_sales_opportunity", "q22"), - ], -) -def test_tpch_query_vs_answer_file(query_code: str, answer_file: str) -> None: - module = import_module(query_code) - df: DataFrame = module.df - - # Treat q17 as a special case. The answer file does not match the spec. - # Running at scale factor 1, we have manually verified this result does - # match the expected value. - if answer_file == "q17": - return check_q17(df) - - (df_selections, expected_schema, expected_selections) = selections_and_schema( - df.schema() - ) - - df = df.select(*df_selections) - - read_schema = pa.schema(expected_schema) - - df_expected = module.ctx.read_csv( - get_answer_file(answer_file), - schema=read_schema, - delimiter="|", - file_extension=".out", - ) - - df_expected = df_expected.select(*expected_selections) - - cols = list(read_schema.names) - - assert df.join(df_expected, on=cols, how="anti").count() == 0 - assert df.count() == df_expected.count() - - return None diff --git a/examples/tpch/convert_data_to_parquet.py b/examples/tpch/convert_data_to_parquet.py deleted file mode 100644 index af554c39e..000000000 --- a/examples/tpch/convert_data_to_parquet.py +++ /dev/null @@ -1,142 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -This is a utility function that will consumer the data generated by dbgen from TPC-H and convert -it into a parquet file with the column names as expected by the TPC-H specification. It assumes -the data generated resides in a path ../../benchmarks/tpch/data relative to the current file, -as will be generated by the script provided in this repository. -""" - -from pathlib import Path - -import datafusion -import pyarrow as pa - -ctx = datafusion.SessionContext() - -all_schemas = {} - -all_schemas["customer"] = [ - ("C_CUSTKEY", pa.int64()), - ("C_NAME", pa.string()), - ("C_ADDRESS", pa.string()), - ("C_NATIONKEY", pa.int64()), - ("C_PHONE", pa.string()), - ("C_ACCTBAL", pa.decimal128(15, 2)), - ("C_MKTSEGMENT", pa.string()), - ("C_COMMENT", pa.string()), -] - -all_schemas["lineitem"] = [ - ("L_ORDERKEY", pa.int64()), - ("L_PARTKEY", pa.int64()), - ("L_SUPPKEY", pa.int64()), - ("L_LINENUMBER", pa.int32()), - ("L_QUANTITY", pa.decimal128(15, 2)), - ("L_EXTENDEDPRICE", pa.decimal128(15, 2)), - ("L_DISCOUNT", pa.decimal128(15, 2)), - ("L_TAX", pa.decimal128(15, 2)), - ("L_RETURNFLAG", pa.string()), - ("L_LINESTATUS", pa.string()), - ("L_SHIPDATE", pa.date32()), - ("L_COMMITDATE", pa.date32()), - ("L_RECEIPTDATE", pa.date32()), - ("L_SHIPINSTRUCT", pa.string()), - ("L_SHIPMODE", pa.string()), - ("L_COMMENT", pa.string()), -] - -all_schemas["nation"] = [ - ("N_NATIONKEY", pa.int64()), - ("N_NAME", pa.string()), - ("N_REGIONKEY", pa.int64()), - ("N_COMMENT", pa.string()), -] - -all_schemas["orders"] = [ - ("O_ORDERKEY", pa.int64()), - ("O_CUSTKEY", pa.int64()), - ("O_ORDERSTATUS", pa.string()), - ("O_TOTALPRICE", pa.decimal128(15, 2)), - ("O_ORDERDATE", pa.date32()), - ("O_ORDERPRIORITY", pa.string()), - ("O_CLERK", pa.string()), - ("O_SHIPPRIORITY", pa.int32()), - ("O_COMMENT", pa.string()), -] - -all_schemas["part"] = [ - ("P_PARTKEY", pa.int64()), - ("P_NAME", pa.string()), - ("P_MFGR", pa.string()), - ("P_BRAND", pa.string()), - ("P_TYPE", pa.string()), - ("P_SIZE", pa.int32()), - ("P_CONTAINER", pa.string()), - ("P_RETAILPRICE", pa.decimal128(15, 2)), - ("P_COMMENT", pa.string()), -] - -all_schemas["partsupp"] = [ - ("PS_PARTKEY", pa.int64()), - ("PS_SUPPKEY", pa.int64()), - ("PS_AVAILQTY", pa.int32()), - ("PS_SUPPLYCOST", pa.decimal128(15, 2)), - ("PS_COMMENT", pa.string()), -] - -all_schemas["region"] = [ - ("r_REGIONKEY", pa.int64()), - ("r_NAME", pa.string()), - ("r_COMMENT", pa.string()), -] - -all_schemas["supplier"] = [ - ("S_SUPPKEY", pa.int64()), - ("S_NAME", pa.string()), - ("S_ADDRESS", pa.string()), - ("S_NATIONKEY", pa.int32()), - ("S_PHONE", pa.string()), - ("S_ACCTBAL", pa.decimal128(15, 2)), - ("S_COMMENT", pa.string()), -] - -curr_dir = Path(__file__).resolve().parent -for filename, curr_schema_val in all_schemas.items(): - # For convenience, go ahead and convert the schema column names to lowercase - curr_schema = [(s[0].lower(), s[1]) for s in curr_schema_val] - - # Pre-collect the output columns so we can ignore the null field we add - # in to handle the trailing | in the file - output_cols = [r[0] for r in curr_schema] - - curr_schema = [pa.field(r[0], r[1], nullable=False) for r in curr_schema] - - # Trailing | requires extra field for in processing - curr_schema.append(("some_null", pa.null())) - - schema = pa.schema(curr_schema) - - source_file = (curr_dir / f"../../benchmarks/tpch/data/{filename}.csv").resolve() - dest_file = (curr_dir / f"./data/{filename}.parquet").resolve() - - df = ctx.read_csv(source_file, schema=schema, has_header=False, delimiter="|") - - df = df.select(*output_cols) - - df.write_parquet(dest_file, compression="snappy") diff --git a/examples/tpch/q01_pricing_summary_report.py b/examples/tpch/q01_pricing_summary_report.py deleted file mode 100644 index 3f97f00dc..000000000 --- a/examples/tpch/q01_pricing_summary_report.py +++ /dev/null @@ -1,90 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 1: - -The Pricing Summary Report Query provides a summary pricing report for all lineitems shipped as of -a given date. The date is within 60 - 120 days of the greatest ship date contained in the database. -The query lists totals for extended price, discounted extended price, discounted extended price -plus tax, average quantity, average extended price, and average discount. These aggregates are -grouped by RETURNFLAG and LINESTATUS, and listed in ascending order of RETURNFLAG and LINESTATUS. -A count of the number of lineitems in each group is included. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -import pyarrow as pa -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -ctx = SessionContext() - -df = ctx.read_parquet(get_data_path("lineitem.parquet")) - -# It may be that the date can be hard coded, based on examples shown. -# This approach will work with any date range in the provided data set. - -greatest_ship_date = df.aggregate( - [], [F.max(col("l_shipdate")).alias("shipdate")] -).collect()[0]["shipdate"][0] - -# From the given problem, this is how close to the last date in the database we -# want to report results for. It should be between 60-120 days before the end. -DAYS_BEFORE_FINAL = 90 - -interval = pa.scalar((0, DAYS_BEFORE_FINAL, 0), type=pa.month_day_nano_interval()) - -print("Final date in database:", greatest_ship_date) - -# Filter data to the dates of interest -df = df.filter(col("l_shipdate") <= lit(greatest_ship_date) - lit(interval)) - -# Aggregate the results - -df = df.aggregate( - [col("l_returnflag"), col("l_linestatus")], - [ - F.sum(col("l_quantity")).alias("sum_qty"), - F.sum(col("l_extendedprice")).alias("sum_base_price"), - F.sum(col("l_extendedprice") * (lit(1) - col("l_discount"))).alias( - "sum_disc_price" - ), - F.sum( - col("l_extendedprice") - * (lit(1) - col("l_discount")) - * (lit(1) + col("l_tax")) - ).alias("sum_charge"), - F.avg(col("l_quantity")).alias("avg_qty"), - F.avg(col("l_extendedprice")).alias("avg_price"), - F.avg(col("l_discount")).alias("avg_disc"), - F.count(col("l_returnflag")).alias( - "count_order" - ), # Counting any column should return same result - ], -) - -# Sort per the expected result - -df = df.sort(col("l_returnflag").sort(), col("l_linestatus").sort()) - -# Note: There appears to be a discrepancy between what is returned here and what is in the generated -# answers file for the case of return flag N and line status O, but I did not investigate further. - -df.show() diff --git a/examples/tpch/q02_minimum_cost_supplier.py b/examples/tpch/q02_minimum_cost_supplier.py deleted file mode 100644 index 7390d0892..000000000 --- a/examples/tpch/q02_minimum_cost_supplier.py +++ /dev/null @@ -1,146 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 2: - -The Minimum Cost Supplier Query finds, in a given region, for each part of a certain type and size, -the supplier who can supply it at minimum cost. If several suppliers in that region offer the -desired part type and size at the same (minimum) cost, the query lists the parts from suppliers with -the 100 highest account balances. For each supplier, the query lists the supplier's account balance, -name and nation; the part's number and manufacturer; the supplier's address, phone number and -comment information. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -import datafusion -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -# This is the part we're looking for. Values selected here differ from the spec in order to run -# unit tests on a small data set. -SIZE_OF_INTEREST = 15 -TYPE_OF_INTEREST = "BRASS" -REGION_OF_INTEREST = "EUROPE" - -# Load the dataframes we need - -ctx = SessionContext() - -df_part = ctx.read_parquet(get_data_path("part.parquet")).select( - "p_partkey", "p_mfgr", "p_type", "p_size" -) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( - "s_acctbal", - "s_name", - "s_address", - "s_phone", - "s_comment", - "s_nationkey", - "s_suppkey", -) -df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select( - "ps_partkey", "ps_suppkey", "ps_supplycost" -) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( - "n_nationkey", "n_regionkey", "n_name" -) -df_region = ctx.read_parquet(get_data_path("region.parquet")).select( - "r_regionkey", "r_name" -) - -# Filter down parts. Part names contain the type of interest, so we can use strpos to find where -# in the p_type column the word is. `strpos` will return 0 if not found, otherwise the position -# in the string where it is located. - -df_part = df_part.filter( - F.strpos(col("p_type"), lit(TYPE_OF_INTEREST)) > lit(0) -).filter(col("p_size") == lit(SIZE_OF_INTEREST)) - -# Filter regions down to the one of interest - -df_region = df_region.filter(col("r_name") == lit(REGION_OF_INTEREST)) - -# Now that we have the region, find suppliers in that region. Suppliers are tied to their nation -# and nations are tied to the region. - -df_nation = df_nation.join( - df_region, left_on=["n_regionkey"], right_on=["r_regionkey"], how="inner" -) -df_supplier = df_supplier.join( - df_nation, left_on=["s_nationkey"], right_on=["n_nationkey"], how="inner" -) - -# Now that we know who the potential suppliers are for the part, we can limit out part -# supplies table down. We can further join down to the specific parts we've identified -# as matching the request - -df = df_partsupp.join( - df_supplier, left_on=["ps_suppkey"], right_on=["s_suppkey"], how="inner" -) - -# Locate the minimum cost across all suppliers. There are multiple ways you could do this, -# but one way is to create a window function across all suppliers, find the minimum, and -# create a column of that value. We can then filter down any rows for which the cost and -# minimum do not match. - -# The default window frame as of 5/6/2024 is from unbounded preceding to the current row. -# We want to evaluate the entire data frame, so we specify this. -window_frame = datafusion.WindowFrame("rows", None, None) -df = df.with_column( - "min_cost", - F.window( - "min", - [col("ps_supplycost")], - partition_by=[col("ps_partkey")], - window_frame=window_frame, - ), -) - -df = df.filter(col("min_cost") == col("ps_supplycost")) - -df = df.join(df_part, left_on=["ps_partkey"], right_on=["p_partkey"], how="inner") - -# From the problem statement, these are the values we wish to output - -df = df.select( - "s_acctbal", - "s_name", - "n_name", - "p_partkey", - "p_mfgr", - "s_address", - "s_phone", - "s_comment", -) - -# Sort and display 100 entries -df = df.sort( - col("s_acctbal").sort(ascending=False), - col("n_name").sort(), - col("s_name").sort(), - col("p_partkey").sort(), -) - -df = df.limit(100) - -# Show results - -df.show() diff --git a/examples/tpch/q03_shipping_priority.py b/examples/tpch/q03_shipping_priority.py deleted file mode 100644 index fc1231e0a..000000000 --- a/examples/tpch/q03_shipping_priority.py +++ /dev/null @@ -1,88 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 3: - -The Shipping Priority Query retrieves the shipping priority and potential revenue, defined as the -sum of l_extendedprice * (1-l_discount), of the orders having the largest revenue among those that -had not been shipped as of a given date. Orders are listed in decreasing order of revenue. If more -than 10 unshipped orders exist, only the 10 orders with the largest revenue are listed. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -SEGMENT_OF_INTEREST = "BUILDING" -DATE_OF_INTEREST = "1995-03-15" - -# Load the dataframes we need - -ctx = SessionContext() - -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( - "c_mktsegment", "c_custkey" -) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( - "o_orderdate", "o_shippriority", "o_custkey", "o_orderkey" -) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_orderkey", "l_extendedprice", "l_discount", "l_shipdate" -) - -# Limit dataframes to the rows of interest - -df_customer = df_customer.filter(col("c_mktsegment") == lit(SEGMENT_OF_INTEREST)) -df_orders = df_orders.filter(col("o_orderdate") < lit(DATE_OF_INTEREST)) -df_lineitem = df_lineitem.filter(col("l_shipdate") > lit(DATE_OF_INTEREST)) - -# Join all 3 dataframes - -df = df_customer.join( - df_orders, left_on=["c_custkey"], right_on=["o_custkey"], how="inner" -).join(df_lineitem, left_on=["o_orderkey"], right_on=["l_orderkey"], how="inner") - -# Compute the revenue - -df = df.aggregate( - [col("l_orderkey")], - [ - F.first_value(col("o_orderdate")).alias("o_orderdate"), - F.first_value(col("o_shippriority")).alias("o_shippriority"), - F.sum(col("l_extendedprice") * (lit(1.0) - col("l_discount"))).alias("revenue"), - ], -) - -# Sort by priority - -df = df.sort(col("revenue").sort(ascending=False), col("o_orderdate").sort()) - -# Only return 10 results - -df = df.limit(10) - -# Change the order that the columns are reported in just to match the spec - -df = df.select("l_orderkey", "revenue", "o_orderdate", "o_shippriority") - -# Show result - -df.show() diff --git a/examples/tpch/q04_order_priority_checking.py b/examples/tpch/q04_order_priority_checking.py deleted file mode 100644 index 426338aea..000000000 --- a/examples/tpch/q04_order_priority_checking.py +++ /dev/null @@ -1,83 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 4: - -The Order Priority Checking Query counts the number of orders ordered in a given quarter of a given -year in which at least one lineitem was received by the customer later than its committed date. The -query lists the count of such orders for each order priority sorted in ascending priority order. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datetime import datetime - -import pyarrow as pa -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -# Ideally we could put 3 months into the interval. See note below. -INTERVAL_DAYS = 92 -DATE_OF_INTEREST = "1993-07-01" - -# Load the dataframes we need - -ctx = SessionContext() - -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( - "o_orderdate", "o_orderpriority", "o_orderkey" -) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_orderkey", "l_commitdate", "l_receiptdate" -) - -# Create a date object from the string -date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() - -interval = pa.scalar((0, INTERVAL_DAYS, 0), type=pa.month_day_nano_interval()) - -# Limit results to cases where commitment date before receipt date -# Aggregate the results so we only get one row to join with the order table. -# Alternately, and likely more idiomatic is instead of `.aggregate` you could -# do `.select("l_orderkey").distinct()`. The goal here is to show -# multiple examples of how to use Data Fusion. -df_lineitem = df_lineitem.filter(col("l_commitdate") < col("l_receiptdate")).aggregate( - [col("l_orderkey")], [] -) - -# Limit orders to date range of interest -df_orders = df_orders.filter(col("o_orderdate") >= lit(date)).filter( - col("o_orderdate") < lit(date) + lit(interval) -) - -# Perform the join to find only orders for which there are lineitems outside of expected range -df = df_orders.join( - df_lineitem, left_on=["o_orderkey"], right_on=["l_orderkey"], how="inner" -) - -# Based on priority, find the number of entries -df = df.aggregate( - [col("o_orderpriority")], [F.count(col("o_orderpriority")).alias("order_count")] -) - -# Sort the results -df = df.sort(col("o_orderpriority").sort()) - -df.show() diff --git a/examples/tpch/q05_local_supplier_volume.py b/examples/tpch/q05_local_supplier_volume.py deleted file mode 100644 index fa2b01dea..000000000 --- a/examples/tpch/q05_local_supplier_volume.py +++ /dev/null @@ -1,105 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 5: - -The Local Supplier Volume Query lists for each nation in a region the revenue volume that resulted -from lineitem transactions in which the customer ordering parts and the supplier filling them were -both within that nation. The query is run in order to determine whether to institute local -distribution centers in a given region. The query considers only parts ordered in a given year. The -query displays the nations and revenue volume in descending order by revenue. Revenue volume for all -qualifying lineitems in a particular nation is defined as sum(l_extendedprice * (1 - l_discount)). - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datetime import datetime - -import pyarrow as pa -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -DATE_OF_INTEREST = "1994-01-01" -INTERVAL_DAYS = 365 -REGION_OF_INTEREST = "ASIA" - -date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() - -interval = pa.scalar((0, INTERVAL_DAYS, 0), type=pa.month_day_nano_interval()) - -# Load the dataframes we need - -ctx = SessionContext() - -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( - "c_custkey", "c_nationkey" -) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( - "o_custkey", "o_orderkey", "o_orderdate" -) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_orderkey", "l_suppkey", "l_extendedprice", "l_discount" -) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( - "s_suppkey", "s_nationkey" -) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( - "n_nationkey", "n_regionkey", "n_name" -) -df_region = ctx.read_parquet(get_data_path("region.parquet")).select( - "r_regionkey", "r_name" -) - -# Restrict dataframes to cases of interest -df_orders = df_orders.filter(col("o_orderdate") >= lit(date)).filter( - col("o_orderdate") < lit(date) + lit(interval) -) - -df_region = df_region.filter(col("r_name") == lit(REGION_OF_INTEREST)) - -# Join all the dataframes - -df = ( - df_customer.join( - df_orders, left_on=["c_custkey"], right_on=["o_custkey"], how="inner" - ) - .join(df_lineitem, left_on=["o_orderkey"], right_on=["l_orderkey"], how="inner") - .join( - df_supplier, - left_on=["l_suppkey", "c_nationkey"], - right_on=["s_suppkey", "s_nationkey"], - how="inner", - ) - .join(df_nation, left_on=["s_nationkey"], right_on=["n_nationkey"], how="inner") - .join(df_region, left_on=["n_regionkey"], right_on=["r_regionkey"], how="inner") -) - -# Compute the final result - -df = df.aggregate( - [col("n_name")], - [F.sum(col("l_extendedprice") * (lit(1.0) - col("l_discount"))).alias("revenue")], -) - -# Sort in descending order - -df = df.sort(col("revenue").sort(ascending=False)) - -df.show() diff --git a/examples/tpch/q06_forecasting_revenue_change.py b/examples/tpch/q06_forecasting_revenue_change.py deleted file mode 100644 index 1de5848b1..000000000 --- a/examples/tpch/q06_forecasting_revenue_change.py +++ /dev/null @@ -1,88 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 6: - -The Forecasting Revenue Change Query considers all the lineitems shipped in a given year with -discounts between DISCOUNT-0.01 and DISCOUNT+0.01. The query lists the amount by which the total -revenue would have increased if these discounts had been eliminated for lineitems with l_quantity -less than quantity. Note that the potential revenue increase is equal to the sum of -[l_extendedprice * l_discount] for all lineitems with discounts and quantities in the qualifying -range. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datetime import datetime - -import pyarrow as pa -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -# Variables from the example query - -DATE_OF_INTEREST = "1994-01-01" -DISCOUT = 0.06 -DELTA = 0.01 -QUANTITY = 24 - -INTERVAL_DAYS = 365 - -date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() - -interval = pa.scalar((0, INTERVAL_DAYS, 0), type=pa.month_day_nano_interval()) - -# Load the dataframes we need - -ctx = SessionContext() - -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_shipdate", "l_quantity", "l_extendedprice", "l_discount" -) - -# Filter down to lineitems of interest - -df = ( - df_lineitem.filter(col("l_shipdate") >= lit(date)) - .filter(col("l_shipdate") < lit(date) + lit(interval)) - .filter(col("l_discount") >= lit(DISCOUT) - lit(DELTA)) - .filter(col("l_discount") <= lit(DISCOUT) + lit(DELTA)) - .filter(col("l_quantity") < lit(QUANTITY)) -) - -# Add up all the "lost" revenue - -df = df.aggregate( - [], [F.sum(col("l_extendedprice") * col("l_discount")).alias("revenue")] -) - -# Show the single result. We could do a `show()` but since we want to demonstrate features of how -# to use Data Fusion, instead collect the result as a python object and print it out. - -# collect() should give a list of record batches. This is a small query, so we should get a -# single batch back, hence the index [0]. Within each record batch we only care about the -# single column result `revenue`. Since we have only one row returned because we aggregated -# over the entire dataframe, we can index it at 0. Then convert the DoubleScalar into a -# simple python object. - -revenue = df.collect()[0]["revenue"][0].as_py() - -# Note: the output value from this query may be dependent on the size of the database generated -print(f"Potential lost revenue: {revenue:.2f}") diff --git a/examples/tpch/q07_volume_shipping.py b/examples/tpch/q07_volume_shipping.py deleted file mode 100644 index ff2f891f1..000000000 --- a/examples/tpch/q07_volume_shipping.py +++ /dev/null @@ -1,128 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 7: - -The Volume Shipping Query finds, for two given nations, the gross discounted revenues derived from -lineitems in which parts were shipped from a supplier in either nation to a customer in the other -nation during 1995 and 1996. The query lists the supplier nation, the customer nation, the year, -and the revenue from shipments that took place in that year. The query orders the answer by -Supplier nation, Customer nation, and year (all ascending). - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datetime import datetime - -import pyarrow as pa -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -# Variables of interest to query over - -nation_1 = lit("FRANCE") -nation_2 = lit("GERMANY") - -START_DATE = "1995-01-01" -END_DATE = "1996-12-31" - -start_date = lit(datetime.strptime(START_DATE, "%Y-%m-%d").date()) -end_date = lit(datetime.strptime(END_DATE, "%Y-%m-%d").date()) - - -# Load the dataframes we need - -ctx = SessionContext() - -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( - "s_suppkey", "s_nationkey" -) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_shipdate", "l_extendedprice", "l_discount", "l_suppkey", "l_orderkey" -) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( - "o_orderkey", "o_custkey" -) -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( - "c_custkey", "c_nationkey" -) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( - "n_nationkey", "n_name" -) - - -# Filter to time of interest -df_lineitem = df_lineitem.filter(col("l_shipdate") >= start_date).filter( - col("l_shipdate") <= end_date -) - - -# A simpler way to do the following operation is to use a filter, but we also want to demonstrate -# how to use case statements. Here we are assigning `n_name` to be itself when it is either of -# the two nations of interest. Since there is no `otherwise()` statement, any values that do -# not match these will result in a null value and then get filtered out. -# -# To do the same using a simple filter would be: -# df_nation = df_nation.filter((F.col("n_name") == nation_1) | (F.col("n_name") == nation_2)) # noqa: ERA001 -df_nation = df_nation.with_column( - "n_name", - F.case(col("n_name")) - .when(nation_1, col("n_name")) - .when(nation_2, col("n_name")) - .end(), -).filter(~col("n_name").is_null()) - - -# Limit suppliers to either nation -df_supplier = df_supplier.join( - df_nation, left_on=["s_nationkey"], right_on=["n_nationkey"], how="inner" -).select(col("s_suppkey"), col("n_name").alias("supp_nation")) - -# Limit customers to either nation -df_customer = df_customer.join( - df_nation, left_on=["c_nationkey"], right_on=["n_nationkey"], how="inner" -).select(col("c_custkey"), col("n_name").alias("cust_nation")) - -# Join up all the data frames from line items, and make sure the supplier and customer are in -# different nations. -df = ( - df_lineitem.join( - df_orders, left_on=["l_orderkey"], right_on=["o_orderkey"], how="inner" - ) - .join(df_customer, left_on=["o_custkey"], right_on=["c_custkey"], how="inner") - .join(df_supplier, left_on=["l_suppkey"], right_on=["s_suppkey"], how="inner") - .filter(col("cust_nation") != col("supp_nation")) -) - -# Extract out two values for every line item -df = df.with_column( - "l_year", F.datepart(lit("year"), col("l_shipdate")).cast(pa.int32()) -).with_column("volume", col("l_extendedprice") * (lit(1.0) - col("l_discount"))) - -# Aggregate the results -df = df.aggregate( - [col("supp_nation"), col("cust_nation"), col("l_year")], - [F.sum(col("volume")).alias("revenue")], -) - -# Sort based on problem statement requirements -df = df.sort(col("supp_nation").sort(), col("cust_nation").sort(), col("l_year").sort()) - -df.show() diff --git a/examples/tpch/q08_market_share.py b/examples/tpch/q08_market_share.py deleted file mode 100644 index 4bf50efba..000000000 --- a/examples/tpch/q08_market_share.py +++ /dev/null @@ -1,178 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 8: - -The market share for a given nation within a given region is defined as the fraction of the -revenue, the sum of [l_extendedprice * (1-l_discount)], from the products of a specified type in -that region that was supplied by suppliers from the given nation. The query determines this for the -years 1995 and 1996 presented in this order. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datetime import datetime - -import pyarrow as pa -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -supplier_nation = lit("BRAZIL") -customer_region = lit("AMERICA") -part_of_interest = lit("ECONOMY ANODIZED STEEL") - -START_DATE = "1995-01-01" -END_DATE = "1996-12-31" - -start_date = lit(datetime.strptime(START_DATE, "%Y-%m-%d").date()) -end_date = lit(datetime.strptime(END_DATE, "%Y-%m-%d").date()) - - -# Load the dataframes we need - -ctx = SessionContext() - -df_part = ctx.read_parquet(get_data_path("part.parquet")).select("p_partkey", "p_type") -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( - "s_suppkey", "s_nationkey" -) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_partkey", "l_extendedprice", "l_discount", "l_suppkey", "l_orderkey" -) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( - "o_orderkey", "o_custkey", "o_orderdate" -) -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( - "c_custkey", "c_nationkey" -) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( - "n_nationkey", "n_name", "n_regionkey" -) -df_region = ctx.read_parquet(get_data_path("region.parquet")).select( - "r_regionkey", "r_name" -) - -# Limit possible parts to the one specified -df_part = df_part.filter(col("p_type") == part_of_interest) - -# Limit orders to those in the specified range - -df_orders = df_orders.filter(col("o_orderdate") >= start_date).filter( - col("o_orderdate") <= end_date -) - -# Part 1: Find customers in the region - -# We want customers in region specified by region_of_interest. This will be used to compute -# the total sales of the part of interest. We want to know of those sales what fraction -# was supplied by the nation of interest. There is no guarantee that the nation of -# interest is within the region of interest. - -# First we find all the sales that make up the basis. - -df_regional_customers = df_region.filter(col("r_name") == customer_region) - -# After this join we have all of the possible sales nations -df_regional_customers = df_regional_customers.join( - df_nation, left_on=["r_regionkey"], right_on=["n_regionkey"], how="inner" -) - -# Now find the possible customers -df_regional_customers = df_regional_customers.join( - df_customer, left_on=["n_nationkey"], right_on=["c_nationkey"], how="inner" -) - -# Next find orders for these customers -df_regional_customers = df_regional_customers.join( - df_orders, left_on=["c_custkey"], right_on=["o_custkey"], how="inner" -) - -# Find all line items from these orders -df_regional_customers = df_regional_customers.join( - df_lineitem, left_on=["o_orderkey"], right_on=["l_orderkey"], how="inner" -) - -# Limit to the part of interest -df_regional_customers = df_regional_customers.join( - df_part, left_on=["l_partkey"], right_on=["p_partkey"], how="inner" -) - -# Compute the volume for each line item -df_regional_customers = df_regional_customers.with_column( - "volume", col("l_extendedprice") * (lit(1.0) - col("l_discount")) -) - -# Part 2: Find suppliers from the nation - -# Now that we have all of the sales of that part in the specified region, we need -# to determine which of those came from suppliers in the nation we are interested in. - -df_national_suppliers = df_nation.filter(col("n_name") == supplier_nation) - -# Determine the suppliers by the limited nation key we have in our single row df above -df_national_suppliers = df_national_suppliers.join( - df_supplier, left_on=["n_nationkey"], right_on=["s_nationkey"], how="inner" -) - -# When we join to the customer dataframe, we don't want to confuse other columns, so only -# select the supplier key that we need -df_national_suppliers = df_national_suppliers.select("s_suppkey") - - -# Part 3: Combine suppliers and customers and compute the market share - -# Now we can do a left outer join on the suppkey. Those line items from other suppliers -# will get a null value. We can check for the existence of this null to compute a volume -# column only from suppliers in the nation we are evaluating. - -df = df_regional_customers.join( - df_national_suppliers, left_on=["l_suppkey"], right_on=["s_suppkey"], how="left" -) - -# Use a case statement to compute the volume sold by suppliers in the nation of interest -df = df.with_column( - "national_volume", - F.case(col("s_suppkey").is_null()) - .when(lit(value=False), col("volume")) - .otherwise(lit(0.0)), -) - -df = df.with_column( - "o_year", F.datepart(lit("year"), col("o_orderdate")).cast(pa.int32()) -) - - -# Lastly, sum up the results - -df = df.aggregate( - [col("o_year")], - [ - F.sum(col("volume")).alias("volume"), - F.sum(col("national_volume")).alias("national_volume"), - ], -) - -df = df.select( - col("o_year"), (F.col("national_volume") / F.col("volume")).alias("mkt_share") -) - -df = df.sort(col("o_year").sort()) - -df.show() diff --git a/examples/tpch/q09_product_type_profit_measure.py b/examples/tpch/q09_product_type_profit_measure.py deleted file mode 100644 index e2abbd095..000000000 --- a/examples/tpch/q09_product_type_profit_measure.py +++ /dev/null @@ -1,98 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 9: - -The Product Type Profit Measure Query finds, for each nation and each year, the profit for all parts -ordered in that year that contain a specified substring in their names and that were filled by a -supplier in that nation. The profit is defined as the sum of -[(l_extendedprice*(1-l_discount)) - (ps_supplycost * l_quantity)] for all lineitems describing -parts in the specified line. The query lists the nations in ascending alphabetical order and, for -each nation, the year and profit in descending order by year (most recent first). - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -import pyarrow as pa -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -part_color = lit("green") - -# Load the dataframes we need - -ctx = SessionContext() - -df_part = ctx.read_parquet(get_data_path("part.parquet")).select("p_partkey", "p_name") -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( - "s_suppkey", "s_nationkey" -) -df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select( - "ps_suppkey", "ps_partkey", "ps_supplycost" -) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_partkey", - "l_extendedprice", - "l_discount", - "l_suppkey", - "l_orderkey", - "l_quantity", -) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( - "o_orderkey", "o_custkey", "o_orderdate" -) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( - "n_nationkey", "n_name", "n_regionkey" -) - -# Limit possible parts to the color specified -df = df_part.filter(F.strpos(col("p_name"), part_color) > lit(0)) - -# We have a series of joins that get us to limit down to the line items we need -df = df.join(df_lineitem, left_on=["p_partkey"], right_on=["l_partkey"], how="inner") -df = df.join(df_supplier, left_on=["l_suppkey"], right_on=["s_suppkey"], how="inner") -df = df.join(df_orders, left_on=["l_orderkey"], right_on=["o_orderkey"], how="inner") -df = df.join( - df_partsupp, - left_on=["l_suppkey", "l_partkey"], - right_on=["ps_suppkey", "ps_partkey"], - how="inner", -) -df = df.join(df_nation, left_on=["s_nationkey"], right_on=["n_nationkey"], how="inner") - -# Compute the intermediate values and limit down to the expressions we need -df = df.select( - col("n_name").alias("nation"), - F.datepart(lit("year"), col("o_orderdate")).cast(pa.int32()).alias("o_year"), - ( - (col("l_extendedprice") * (lit(1) - col("l_discount"))) - - (col("ps_supplycost") * col("l_quantity")) - ).alias("amount"), -) - -# Sum up the values by nation and year -df = df.aggregate( - [col("nation"), col("o_year")], [F.sum(col("amount")).alias("profit")] -) - -# Sort according to the problem specification -df = df.sort(col("nation").sort(), col("o_year").sort(ascending=False)) - -df.show() diff --git a/examples/tpch/q10_returned_item_reporting.py b/examples/tpch/q10_returned_item_reporting.py deleted file mode 100644 index ed822e264..000000000 --- a/examples/tpch/q10_returned_item_reporting.py +++ /dev/null @@ -1,109 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 10: - -The Returned Item Reporting Query finds the top 20 customers, in terms of their effect on lost -revenue for a given quarter, who have returned parts. The query considers only parts that were -ordered in the specified quarter. The query lists the customer's name, address, nation, phone -number, account balance, comment information and revenue lost. The customers are listed in -descending order of lost revenue. Revenue lost is defined as -sum(l_extendedprice*(1-l_discount)) for all qualifying lineitems. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datetime import datetime - -import pyarrow as pa -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -DATE_START_OF_QUARTER = "1993-10-01" - -date_start_of_quarter = lit(datetime.strptime(DATE_START_OF_QUARTER, "%Y-%m-%d").date()) - -interval_one_quarter = lit(pa.scalar((0, 92, 0), type=pa.month_day_nano_interval())) - -# Load the dataframes we need - -ctx = SessionContext() - -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( - "c_custkey", - "c_nationkey", - "c_name", - "c_acctbal", - "c_address", - "c_phone", - "c_comment", -) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_extendedprice", "l_discount", "l_orderkey", "l_returnflag" -) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( - "o_orderkey", "o_custkey", "o_orderdate" -) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( - "n_nationkey", "n_name", "n_regionkey" -) - -# limit to returns -df_lineitem = df_lineitem.filter(col("l_returnflag") == lit("R")) - - -# Rather than aggregate by all of the customer fields as you might do looking at the specification, -# we can aggregate by o_custkey and then join in the customer data at the end. - -df = df_orders.filter(col("o_orderdate") >= date_start_of_quarter).filter( - col("o_orderdate") < date_start_of_quarter + interval_one_quarter -) - -df = df.join(df_lineitem, left_on=["o_orderkey"], right_on=["l_orderkey"], how="inner") - -# Compute the revenue -df = df.aggregate( - [col("o_custkey")], - [F.sum(col("l_extendedprice") * (lit(1) - col("l_discount"))).alias("revenue")], -) - -# Now join in the customer data -df = df.join(df_customer, left_on=["o_custkey"], right_on=["c_custkey"], how="inner") -df = df.join(df_nation, left_on=["c_nationkey"], right_on=["n_nationkey"], how="inner") - -# These are the columns the problem statement requires -df = df.select( - "c_custkey", - "c_name", - "revenue", - "c_acctbal", - "n_name", - "c_address", - "c_phone", - "c_comment", -) - -# Sort the results in descending order -df = df.sort(col("revenue").sort(ascending=False)) - -# Only return the top 20 results -df = df.limit(20) - -df.show() diff --git a/examples/tpch/q11_important_stock_identification.py b/examples/tpch/q11_important_stock_identification.py deleted file mode 100644 index 22829ab7c..000000000 --- a/examples/tpch/q11_important_stock_identification.py +++ /dev/null @@ -1,86 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 11: - -The Important Stock Identification Query finds, from scanning the available stock of suppliers -in a given nation, all the parts that represent a significant percentage of the total value of -all available parts. The query displays the part number and the value of those parts in -descending order of value. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datafusion import SessionContext, WindowFrame, col, lit -from datafusion import functions as F -from util import get_data_path - -NATION = "GERMANY" -FRACTION = 0.0001 - -# Load the dataframes we need - -ctx = SessionContext() - -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( - "s_suppkey", "s_nationkey" -) -df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select( - "ps_supplycost", "ps_availqty", "ps_suppkey", "ps_partkey" -) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( - "n_nationkey", "n_name" -) - -# limit to returns -df_nation = df_nation.filter(col("n_name") == lit(NATION)) - -# Find part supplies of within this target nation - -df = df_nation.join( - df_supplier, left_on=["n_nationkey"], right_on=["s_nationkey"], how="inner" -) - -df = df.join(df_partsupp, left_on=["s_suppkey"], right_on=["ps_suppkey"], how="inner") - - -# Compute the value of individual parts -df = df.with_column("value", col("ps_supplycost") * col("ps_availqty")) - -# Compute total value of specific parts -df = df.aggregate([col("ps_partkey")], [F.sum(col("value")).alias("value")]) - -# By default window functions go from unbounded preceding to current row, but we want -# to compute this sum across all rows -window_frame = WindowFrame("rows", None, None) - -df = df.with_column( - "total_value", F.window("sum", [col("value")], window_frame=window_frame) -) - -# Limit to the parts for which there is a significant value based on the fraction of the total -df = df.filter(col("value") / col("total_value") >= lit(FRACTION)) - -# We only need to report on these two columns -df = df.select("ps_partkey", "value") - -# Sort in descending order of value -df = df.sort(col("value").sort(ascending=False)) - -df.show() diff --git a/examples/tpch/q12_ship_mode_order_priority.py b/examples/tpch/q12_ship_mode_order_priority.py deleted file mode 100644 index 9071597f0..000000000 --- a/examples/tpch/q12_ship_mode_order_priority.py +++ /dev/null @@ -1,113 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 12: - -The Shipping Modes and Order Priority Query counts, by ship mode, for lineitems actually received -by customers in a given year, the number of lineitems belonging to orders for which the -l_receiptdate exceeds the l_commitdate for two different specified ship modes. Only lineitems that -were actually shipped before the l_commitdate are considered. The late lineitems are partitioned -into two groups, those with priority URGENT or HIGH, and those with a priority other than URGENT or -HIGH. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datetime import datetime - -import pyarrow as pa -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -SHIP_MODE_1 = "MAIL" -SHIP_MODE_2 = "SHIP" -DATE_OF_INTEREST = "1994-01-01" - -# Load the dataframes we need - -ctx = SessionContext() - -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( - "o_orderkey", "o_orderpriority" -) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_orderkey", "l_shipmode", "l_commitdate", "l_shipdate", "l_receiptdate" -) - -date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() - -interval = pa.scalar((0, 365, 0), type=pa.month_day_nano_interval()) - - -df = df_lineitem.filter(col("l_receiptdate") >= lit(date)).filter( - col("l_receiptdate") < lit(date) + lit(interval) -) - -# Note: It is not recommended to use array_has because it treats the second argument as an argument -# so if you pass it col("l_shipmode") it will pass the entire array to process which is very slow. -# Instead check the position of the entry is not null. -df = df.filter( - ~F.array_position( - F.make_array(lit(SHIP_MODE_1), lit(SHIP_MODE_2)), col("l_shipmode") - ).is_null() -) - -# Since we have only two values, it's much easier to do this as a filter where the l_shipmode -# matches either of the two values, but we want to show doing some array operations in this -# example. If you want to see this done with filters, comment out the above line and uncomment -# this one. -# df = df.filter((col("l_shipmode") == lit(SHIP_MODE_1)) | (col("l_shipmode") == lit(SHIP_MODE_2))) # noqa: ERA001 - - -# We need order priority, so join order df to line item -df = df.join(df_orders, left_on=["l_orderkey"], right_on=["o_orderkey"], how="inner") - -# Restrict to line items we care about based on the problem statement. -df = df.filter(col("l_commitdate") < col("l_receiptdate")) - -df = df.filter(col("l_shipdate") < col("l_commitdate")) - -df = df.with_column( - "high_line_value", - F.case(col("o_orderpriority")) - .when(lit("1-URGENT"), lit(1)) - .when(lit("2-HIGH"), lit(1)) - .otherwise(lit(0)), -) - -# Aggregate the results -df = df.aggregate( - [col("l_shipmode")], - [ - F.sum(col("high_line_value")).alias("high_line_count"), - F.count(col("high_line_value")).alias("all_lines_count"), - ], -) - -# Compute the final output -df = df.select( - col("l_shipmode"), - col("high_line_count"), - (col("all_lines_count") - col("high_line_count")).alias("low_line_count"), -) - -df = df.sort(col("l_shipmode").sort()) - -df.show() diff --git a/examples/tpch/q13_customer_distribution.py b/examples/tpch/q13_customer_distribution.py deleted file mode 100644 index 93f082ea3..000000000 --- a/examples/tpch/q13_customer_distribution.py +++ /dev/null @@ -1,68 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 13: - -This query determines the distribution of customers by the number of orders they have made, -including customers who have no record of orders, past or present. It counts and reports how many -customers have no orders, how many have 1, 2, 3, etc. A check is made to ensure that the orders -counted do not fall into one of several special categories of orders. Special categories are -identified in the order comment column by looking for a particular pattern. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -WORD_1 = "special" -WORD_2 = "requests" - -# Load the dataframes we need - -ctx = SessionContext() - -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( - "o_custkey", "o_comment" -) -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select("c_custkey") - -# Use a regex to remove special cases -df_orders = df_orders.filter( - F.regexp_match(col("o_comment"), lit(f"{WORD_1}.?*{WORD_2}")).is_null() -) - -# Since we may have customers with no orders we must do a left join -df = df_customer.join( - df_orders, left_on=["c_custkey"], right_on=["o_custkey"], how="left" -) - -# Find the number of orders for each customer -df = df.aggregate([col("c_custkey")], [F.count(col("o_custkey")).alias("c_count")]) - -# Ultimately we want to know the number of customers that have that customer count -df = df.aggregate([col("c_count")], [F.count(col("c_count")).alias("custdist")]) - -# We want to order the results by the highest number of customers per count -df = df.sort( - col("custdist").sort(ascending=False), col("c_count").sort(ascending=False) -) - -df.show() diff --git a/examples/tpch/q14_promotion_effect.py b/examples/tpch/q14_promotion_effect.py deleted file mode 100644 index d62f76e3c..000000000 --- a/examples/tpch/q14_promotion_effect.py +++ /dev/null @@ -1,85 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 14: - -The Promotion Effect Query determines what percentage of the revenue in a given year and month was -derived from promotional parts. The query considers only parts actually shipped in that month and -gives the percentage. Revenue is defined as (l_extendedprice * (1-l_discount)). - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datetime import datetime - -import pyarrow as pa -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -DATE = "1995-09-01" - -date_of_interest = lit(datetime.strptime(DATE, "%Y-%m-%d").date()) - -interval_one_month = lit(pa.scalar((0, 30, 0), type=pa.month_day_nano_interval())) - -# Load the dataframes we need - -ctx = SessionContext() - -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_partkey", "l_shipdate", "l_extendedprice", "l_discount" -) -df_part = ctx.read_parquet(get_data_path("part.parquet")).select("p_partkey", "p_type") - - -# Check part type begins with PROMO -df_part = df_part.filter( - F.substring(col("p_type"), lit(0), lit(6)) == lit("PROMO") -).with_column("promo_factor", lit(1.0)) - -df_lineitem = df_lineitem.filter(col("l_shipdate") >= date_of_interest).filter( - col("l_shipdate") < date_of_interest + interval_one_month -) - -# Left join so we can sum up the promo parts different from other parts -df = df_lineitem.join( - df_part, left_on=["l_partkey"], right_on=["p_partkey"], how="left" -) - -# Make a factor of 1.0 if it is a promotion, 0.0 otherwise -df = df.with_column("promo_factor", F.coalesce(col("promo_factor"), lit(0.0))) -df = df.with_column("revenue", col("l_extendedprice") * (lit(1.0) - col("l_discount"))) - - -# Sum up the promo and total revenue -df = df.aggregate( - [], - [ - F.sum(col("promo_factor") * col("revenue")).alias("promo_revenue"), - F.sum(col("revenue")).alias("total_revenue"), - ], -) - -# Return the percentage of revenue from promotions -df = df.select( - (lit(100.0) * col("promo_revenue") / col("total_revenue")).alias("promo_revenue") -) - -df.show() diff --git a/examples/tpch/q15_top_supplier.py b/examples/tpch/q15_top_supplier.py deleted file mode 100644 index c321048f2..000000000 --- a/examples/tpch/q15_top_supplier.py +++ /dev/null @@ -1,89 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 15: - -The Top Supplier Query finds the supplier who contributed the most to the overall revenue for parts -shipped during a given quarter of a given year. In case of a tie, the query lists all suppliers -whose contribution was equal to the maximum, presented in supplier number order. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datetime import datetime - -import pyarrow as pa -from datafusion import SessionContext, WindowFrame, col, lit -from datafusion import functions as F -from util import get_data_path - -DATE = "1996-01-01" - -date_of_interest = lit(datetime.strptime(DATE, "%Y-%m-%d").date()) - -interval_3_months = lit(pa.scalar((0, 91, 0), type=pa.month_day_nano_interval())) - -# Load the dataframes we need - -ctx = SessionContext() - -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_suppkey", "l_shipdate", "l_extendedprice", "l_discount" -) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( - "s_suppkey", - "s_name", - "s_address", - "s_phone", -) - -# Limit line items to the quarter of interest -df_lineitem = df_lineitem.filter(col("l_shipdate") >= date_of_interest).filter( - col("l_shipdate") < date_of_interest + interval_3_months -) - -df = df_lineitem.aggregate( - [col("l_suppkey")], - [ - F.sum(col("l_extendedprice") * (lit(1) - col("l_discount"))).alias( - "total_revenue" - ) - ], -) - -# Use a window function to find the maximum revenue across the entire dataframe -window_frame = WindowFrame("rows", None, None) -df = df.with_column( - "max_revenue", F.window("max", [col("total_revenue")], window_frame=window_frame) -) - -# Find all suppliers whose total revenue is the same as the maximum -df = df.filter(col("total_revenue") == col("max_revenue")) - -# Now that we know the supplier(s) with maximum revenue, get the rest of their information -# from the supplier table -df = df.join(df_supplier, left_on=["l_suppkey"], right_on=["s_suppkey"], how="inner") - -# Return only the columns requested -df = df.select("s_suppkey", "s_name", "s_address", "s_phone", "total_revenue") - -# If we have more than one, sort by supplier number (suppkey) -df = df.sort(col("s_suppkey").sort()) - -df.show() diff --git a/examples/tpch/q16_part_supplier_relationship.py b/examples/tpch/q16_part_supplier_relationship.py deleted file mode 100644 index 65043ffda..000000000 --- a/examples/tpch/q16_part_supplier_relationship.py +++ /dev/null @@ -1,90 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 16: - -The Parts/Supplier Relationship Query counts the number of suppliers who can supply parts that -satisfy a particular customer's requirements. The customer is interested in parts of eight -different sizes as long as they are not of a given type, not of a given brand, and not from a -supplier who has had complaints registered at the Better Business Bureau. Results must be presented -in descending count and ascending brand, type, and size. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -import pyarrow as pa -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -BRAND = "Brand#45" -TYPE_TO_IGNORE = "MEDIUM POLISHED" -SIZES_OF_INTEREST = [49, 14, 23, 45, 19, 3, 36, 9] - -# Load the dataframes we need - -ctx = SessionContext() - -df_part = ctx.read_parquet(get_data_path("part.parquet")).select( - "p_partkey", "p_brand", "p_type", "p_size" -) -df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select( - "ps_suppkey", "ps_partkey" -) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( - "s_suppkey", "s_comment" -) - -df_unwanted_suppliers = df_supplier.filter( - ~F.regexp_match(col("s_comment"), lit("Customer.?*Complaints")).is_null() -) - -# Remove unwanted suppliers -df_partsupp = df_partsupp.join( - df_unwanted_suppliers, left_on=["ps_suppkey"], right_on=["s_suppkey"], how="anti" -) - -# Select the parts we are interested in -df_part = df_part.filter(col("p_brand") != lit(BRAND)) -df_part = df_part.filter( - F.substring(col("p_type"), lit(0), lit(len(TYPE_TO_IGNORE) + 1)) - != lit(TYPE_TO_IGNORE) -) - -# Python conversion of integer to literal casts it to int64 but the data for -# part size is stored as an int32, so perform a cast. Then check to find if the part -# size is within the array of possible sizes by checking the position of it is not -# null. -p_sizes = F.make_array(*[lit(s).cast(pa.int32()) for s in SIZES_OF_INTEREST]) -df_part = df_part.filter(~F.array_position(p_sizes, col("p_size")).is_null()) - -df = df_part.join( - df_partsupp, left_on=["p_partkey"], right_on=["ps_partkey"], how="inner" -) - -df = df.select("p_brand", "p_type", "p_size", "ps_suppkey").distinct() - -df = df.aggregate( - [col("p_brand"), col("p_type"), col("p_size")], - [F.count(col("ps_suppkey")).alias("supplier_cnt")], -) - -df = df.sort(col("supplier_cnt").sort(ascending=False)) - -df.show() diff --git a/examples/tpch/q17_small_quantity_order.py b/examples/tpch/q17_small_quantity_order.py deleted file mode 100644 index 6d76fe506..000000000 --- a/examples/tpch/q17_small_quantity_order.py +++ /dev/null @@ -1,77 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 17: - -The Small-Quantity-Order Revenue Query considers parts of a given brand and with a given container -type and determines the average lineitem quantity of such parts ordered for all orders (past and -pending) in the 7-year database. What would be the average yearly gross (undiscounted) loss in -revenue if orders for these parts with a quantity of less than 20% of this average were no longer -taken? - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datafusion import SessionContext, WindowFrame, col, lit -from datafusion import functions as F -from util import get_data_path - -BRAND = "Brand#23" -CONTAINER = "MED BOX" - -# Load the dataframes we need - -ctx = SessionContext() - -df_part = ctx.read_parquet(get_data_path("part.parquet")).select( - "p_partkey", "p_brand", "p_container" -) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_partkey", "l_quantity", "l_extendedprice" -) - -# Limit to the problem statement's brand and container types -df = df_part.filter(col("p_brand") == lit(BRAND)).filter( - col("p_container") == lit(CONTAINER) -) - -# Combine data -df = df.join(df_lineitem, left_on=["p_partkey"], right_on=["l_partkey"], how="inner") - -# Find the average quantity -window_frame = WindowFrame("rows", None, None) -df = df.with_column( - "avg_quantity", - F.window( - "avg", - [col("l_quantity")], - window_frame=window_frame, - partition_by=[col("l_partkey")], - ), -) - -df = df.filter(col("l_quantity") < lit(0.2) * col("avg_quantity")) - -# Compute the total -df = df.aggregate([], [F.sum(col("l_extendedprice")).alias("total")]) - -# Divide by number of years in the problem statement to get average -df = df.select((col("total") / lit(7)).alias("avg_yearly")) - -df.show() diff --git a/examples/tpch/q18_large_volume_customer.py b/examples/tpch/q18_large_volume_customer.py deleted file mode 100644 index 834d181c9..000000000 --- a/examples/tpch/q18_large_volume_customer.py +++ /dev/null @@ -1,67 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 18: - -The Large Volume Customer Query finds a list of the top 100 customers who have ever placed large -quantity orders. The query lists the customer name, customer key, the order key, date and total -price and the quantity for the order. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -QUANTITY = 300 - -# Load the dataframes we need - -ctx = SessionContext() - -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( - "c_custkey", "c_name" -) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( - "o_orderkey", "o_custkey", "o_orderdate", "o_totalprice" -) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_orderkey", "l_quantity", "l_extendedprice" -) - -df = df_lineitem.aggregate( - [col("l_orderkey")], [F.sum(col("l_quantity")).alias("total_quantity")] -) - -# Limit to orders in which the total quantity is above a threshold -df = df.filter(col("total_quantity") > lit(QUANTITY)) - -# We've identified the orders of interest, now join the additional data -# we are required to report on -df = df.join(df_orders, left_on=["l_orderkey"], right_on=["o_orderkey"], how="inner") -df = df.join(df_customer, left_on=["o_custkey"], right_on=["c_custkey"], how="inner") - -df = df.select( - "c_name", "c_custkey", "o_orderkey", "o_orderdate", "o_totalprice", "total_quantity" -) - -df = df.sort(col("o_totalprice").sort(ascending=False), col("o_orderdate").sort()) - -df.show() diff --git a/examples/tpch/q19_discounted_revenue.py b/examples/tpch/q19_discounted_revenue.py deleted file mode 100644 index bd492aac0..000000000 --- a/examples/tpch/q19_discounted_revenue.py +++ /dev/null @@ -1,138 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 19: - -The Discounted Revenue query finds the gross discounted revenue for all orders for three different -types of parts that were shipped by air and delivered in person. Parts are selected based on the -combination of specific brands, a list of containers, and a range of sizes. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -import pyarrow as pa -from datafusion import SessionContext, col, lit, udf -from datafusion import functions as F -from util import get_data_path - -items_of_interest = { - "Brand#12": { - "min_quantity": 1, - "containers": ["SM CASE", "SM BOX", "SM PACK", "SM PKG"], - "max_size": 5, - }, - "Brand#23": { - "min_quantity": 10, - "containers": ["MED BAG", "MED BOX", "MED PKG", "MED PACK"], - "max_size": 10, - }, - "Brand#34": { - "min_quantity": 20, - "containers": ["LG CASE", "LG BOX", "LG PACK", "LG PKG"], - "max_size": 15, - }, -} - -# Load the dataframes we need - -ctx = SessionContext() - -df_part = ctx.read_parquet(get_data_path("part.parquet")).select( - "p_partkey", "p_brand", "p_container", "p_size" -) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_partkey", - "l_quantity", - "l_shipmode", - "l_shipinstruct", - "l_extendedprice", - "l_discount", -) - -# These limitations apply to all line items, so go ahead and do them first - -df = df_lineitem.filter(col("l_shipinstruct") == lit("DELIVER IN PERSON")) - -df = df.filter( - (col("l_shipmode") == lit("AIR")) | (col("l_shipmode") == lit("AIR REG")) -) - -df = df.join(df_part, left_on=["l_partkey"], right_on=["p_partkey"], how="inner") - - -# Create the user defined function (UDF) definition that does the work -def is_of_interest( - brand_arr: pa.Array, - container_arr: pa.Array, - quantity_arr: pa.Array, - size_arr: pa.Array, -) -> pa.Array: - """ - The purpose of this function is to demonstrate how a UDF works, taking as input a pyarrow Array - and generating a resultant Array. The length of the inputs should match and there should be the - same number of rows in the output. - """ - result = [] - for idx, brand_val in enumerate(brand_arr): - brand = brand_val.as_py() - if brand in items_of_interest: - values_of_interest = items_of_interest[brand] - - container_matches = ( - container_arr[idx].as_py() in values_of_interest["containers"] - ) - - quantity = quantity_arr[idx].as_py() - quantity_matches = ( - values_of_interest["min_quantity"] - <= quantity - <= values_of_interest["min_quantity"] + 10 - ) - - size = size_arr[idx].as_py() - size_matches = 1 <= size <= values_of_interest["max_size"] - - result.append(container_matches and quantity_matches and size_matches) - else: - result.append(False) - - return pa.array(result) - - -# Turn the above function into a UDF that DataFusion can understand -is_of_interest_udf = udf( - is_of_interest, - [pa.utf8(), pa.utf8(), pa.decimal128(15, 2), pa.int32()], - pa.bool_(), - "stable", -) - -# Filter results using the above UDF -df = df.filter( - is_of_interest_udf( - col("p_brand"), col("p_container"), col("l_quantity"), col("p_size") - ) -) - -df = df.aggregate( - [], - [F.sum(col("l_extendedprice") * (lit(1) - col("l_discount"))).alias("revenue")], -) - -df.show() diff --git a/examples/tpch/q20_potential_part_promotion.py b/examples/tpch/q20_potential_part_promotion.py deleted file mode 100644 index a25188d31..000000000 --- a/examples/tpch/q20_potential_part_promotion.py +++ /dev/null @@ -1,101 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 20: - -The Potential Part Promotion query identifies suppliers who have an excess of a given part -available; an excess is defined to be more than 50% of the parts like the given part that the -supplier shipped in a given year for a given nation. Only parts whose names share a certain naming -convention are considered. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datetime import datetime - -import pyarrow as pa -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -COLOR_OF_INTEREST = "forest" -DATE_OF_INTEREST = "1994-01-01" -NATION_OF_INTEREST = "CANADA" - -# Load the dataframes we need - -ctx = SessionContext() - -df_part = ctx.read_parquet(get_data_path("part.parquet")).select("p_partkey", "p_name") -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_shipdate", "l_partkey", "l_suppkey", "l_quantity" -) -df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select( - "ps_partkey", "ps_suppkey", "ps_availqty" -) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( - "s_suppkey", "s_address", "s_name", "s_nationkey" -) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( - "n_nationkey", "n_name" -) - -date = datetime.strptime(DATE_OF_INTEREST, "%Y-%m-%d").date() - -interval = pa.scalar((0, 365, 0), type=pa.month_day_nano_interval()) - -# Filter down dataframes -df_nation = df_nation.filter(col("n_name") == lit(NATION_OF_INTEREST)) -df_part = df_part.filter( - F.substring(col("p_name"), lit(0), lit(len(COLOR_OF_INTEREST) + 1)) - == lit(COLOR_OF_INTEREST) -) - -df = df_lineitem.filter(col("l_shipdate") >= lit(date)).filter( - col("l_shipdate") < lit(date) + lit(interval) -) - -# This will filter down the line items to the parts of interest -df = df.join(df_part, left_on="l_partkey", right_on="p_partkey", how="inner") - -# Compute the total sold and limit ourselves to individual supplier/part combinations -df = df.aggregate( - [col("l_partkey"), col("l_suppkey")], [F.sum(col("l_quantity")).alias("total_sold")] -) - -df = df.join( - df_partsupp, - left_on=["l_partkey", "l_suppkey"], - right_on=["ps_partkey", "ps_suppkey"], - how="inner", -) - -# Find cases of excess quantity -df.filter(col("ps_availqty") > lit(0.5) * col("total_sold")) - -# We could do these joins earlier, but now limit to the nation of interest suppliers -df = df.join(df_supplier, left_on=["ps_suppkey"], right_on=["s_suppkey"], how="inner") -df = df.join(df_nation, left_on=["s_nationkey"], right_on=["n_nationkey"], how="inner") - -# Restrict to the requested data per the problem statement -df = df.select("s_name", "s_address").distinct() - -df = df.sort(col("s_name").sort()) - -df.show() diff --git a/examples/tpch/q21_suppliers_kept_orders_waiting.py b/examples/tpch/q21_suppliers_kept_orders_waiting.py deleted file mode 100644 index 619c4406b..000000000 --- a/examples/tpch/q21_suppliers_kept_orders_waiting.py +++ /dev/null @@ -1,118 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 21: - -The Suppliers Who Kept Orders Waiting query identifies suppliers, for a given nation, whose product -was part of a multi-supplier order (with current status of 'F') where they were the only supplier -who failed to meet the committed delivery date. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datafusion import SessionContext, col, lit -from datafusion import functions as F -from util import get_data_path - -NATION_OF_INTEREST = "SAUDI ARABIA" - -# Load the dataframes we need - -ctx = SessionContext() - -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( - "o_orderkey", "o_orderstatus" -) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( - "l_orderkey", "l_receiptdate", "l_commitdate", "l_suppkey" -) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( - "s_suppkey", "s_name", "s_nationkey" -) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( - "n_nationkey", "n_name" -) - -# Limit to suppliers in the nation of interest -df_suppliers_of_interest = df_nation.filter(col("n_name") == lit(NATION_OF_INTEREST)) - -df_suppliers_of_interest = df_suppliers_of_interest.join( - df_supplier, left_on="n_nationkey", right_on="s_nationkey", how="inner" -) - -# Find the failed orders and all their line items -df = df_orders.filter(col("o_orderstatus") == lit("F")) - -df = df_lineitem.join(df, left_on="l_orderkey", right_on="o_orderkey", how="inner") - -# Identify the line items for which the order is failed due to. -df = df.with_column( - "failed_supp", - F.case(col("l_receiptdate") > col("l_commitdate")) - .when(lit(value=True), col("l_suppkey")) - .end(), -) - -# There are other ways we could do this but the purpose of this example is to work with rows where -# an element is an array of values. In this case, we will create two columns of arrays. One will be -# an array of all of the suppliers who made up this order. That way we can filter the dataframe for -# only orders where this array is larger than one for multiple supplier orders. The second column -# is all of the suppliers who failed to make their commitment. We can filter the second column for -# arrays with size one. That combination will give us orders that had multiple suppliers where only -# one failed. Use distinct=True in the blow aggregation so we don't get multiple line items from the -# same supplier reported in either array. -df = df.aggregate( - [col("o_orderkey")], - [ - F.array_agg(col("l_suppkey"), distinct=True).alias("all_suppliers"), - F.array_agg(col("failed_supp"), distinct=True).alias("failed_suppliers"), - ], -) - -# Remove the null entries that will get returned by array_agg so we can test to see where we only -# have a single failed supplier in a multiple supplier order -df = df.with_column( - "failed_suppliers", F.array_remove(col("failed_suppliers"), lit(None)) -) - -# This is the check described above which will identify single failed supplier in a multiple -# supplier order. -df = df.filter(F.array_length(col("failed_suppliers")) == lit(1)).filter( - F.array_length(col("all_suppliers")) > lit(1) -) - -# Since we have an array we know is exactly one element long, we can extract that single value. -df = df.select( - col("o_orderkey"), F.array_element(col("failed_suppliers"), lit(1)).alias("suppkey") -) - -# Join to the supplier of interest list for the nation of interest -df = df.join( - df_suppliers_of_interest, left_on=["suppkey"], right_on=["s_suppkey"], how="inner" -) - -# Count how many orders that supplier is the only failed supplier for -df = df.aggregate([col("s_name")], [F.count(col("o_orderkey")).alias("numwait")]) - -# Return in descending order -df = df.sort(col("numwait").sort(ascending=False), col("s_name").sort()) - -df = df.limit(100) - -df.show() diff --git a/examples/tpch/q22_global_sales_opportunity.py b/examples/tpch/q22_global_sales_opportunity.py deleted file mode 100644 index c4d115b74..000000000 --- a/examples/tpch/q22_global_sales_opportunity.py +++ /dev/null @@ -1,79 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -TPC-H Problem Statement Query 22: - -This query counts how many customers within a specific range of country codes have not placed -orders for 7 years but who have a greater than average “positive” account balance. It also reflects -the magnitude of that balance. Country code is defined as the first two characters of c_phone. - -The above problem statement text is copyrighted by the Transaction Processing Performance Council -as part of their TPC Benchmark H Specification revision 2.18.0. -""" - -from datafusion import SessionContext, WindowFrame, col, lit -from datafusion import functions as F -from util import get_data_path - -NATION_CODES = [13, 31, 23, 29, 30, 18, 17] - -# Load the dataframes we need - -ctx = SessionContext() - -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( - "c_phone", "c_acctbal", "c_custkey" -) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select("o_custkey") - -# The nation code is a two digit number, but we need to convert it to a string literal -nation_codes = F.make_array(*[lit(str(n)) for n in NATION_CODES]) - -# Use the substring operation to extract the first two characters of the phone number -df = df_customer.with_column("cntrycode", F.substring(col("c_phone"), lit(0), lit(3))) - -# Limit our search to customers with some balance and in the country code above -df = df.filter(col("c_acctbal") > lit(0.0)) -df = df.filter(~F.array_position(nation_codes, col("cntrycode")).is_null()) - -# Compute the average balance. By default, the window frame is from unbounded preceding to the -# current row. We want our frame to cover the entire data frame. -window_frame = WindowFrame("rows", None, None) -df = df.with_column( - "avg_balance", F.window("avg", [col("c_acctbal")], window_frame=window_frame) -) - -df.show() -# Limit results to customers with above average balance -df = df.filter(col("c_acctbal") > col("avg_balance")) - -# Limit results to customers with no orders -df = df.join(df_orders, left_on="c_custkey", right_on="o_custkey", how="anti") - -# Count up the customers and the balances -df = df.aggregate( - [col("cntrycode")], - [ - F.count(col("c_custkey")).alias("numcust"), - F.sum(col("c_acctbal")).alias("totacctbal"), - ], -) - -df = df.sort(col("cntrycode").sort()) - -df.show() diff --git a/examples/tpch/util.py b/examples/tpch/util.py deleted file mode 100644 index ec53bcd15..000000000 --- a/examples/tpch/util.py +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -Common utilities for running TPC-H examples. -""" - -from pathlib import Path - - -def get_data_path(filename: str) -> Path: - path = Path(__file__).resolve().parent - - return path / "data" / filename - - -def get_answer_file(answer_file: str) -> Path: - path = Path(__file__).resolve().parent - - return path / "../../benchmarks/tpch/data/answers" / f"{answer_file}.out" diff --git a/genindex.html b/genindex.html new file mode 100644 index 000000000..277d439ea --- /dev/null +++ b/genindex.html @@ -0,0 +1,4006 @@ + + + + + + + Index — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ +
+ + + + + + +
+ +
+ + +

Index

+ +
+ _ + | A + | B + | C + | D + | E + | F + | G + | H + | I + | J + | K + | L + | M + | N + | O + | P + | Q + | R + | S + | T + | U + | V + | W + | Z + +
+

_

+ + + +
+ +

A

+ + + +
+ +

B

+ + + +
+ +

C

+ + + +
+ +

D

+ + + +
+ +

E

+ + + +
+ +

F

+ + + +
+ +

G

+ + + +
+ +

H

+ + + +
+ +

I

+ + + +
+ +

J

+ + + +
+ +

K

+ + +
+ +

L

+ + + +
+ +

M

+ + + +
+ +

N

+ + + +
+ +

O

+ + + +
+ +

P

+ + + +
+ +

Q

+ + +
+ +

R

+ + + +
+ +

S

+ + + +
+ +

T

+ + + +
+ +

U

+ + + +
+ +

V

+ + + +
+ +

W

+ + + +
+ +

Z

+ + +
+ + + +
+ + + +
+
+ +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 000000000..cfaa53f66 --- /dev/null +++ b/index.html @@ -0,0 +1,561 @@ + + + + + + + + DataFusion in Python — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

DataFusion in Python

+

This is a Python library that binds to Apache Arrow in-memory query engine DataFusion.

+

Like pyspark, it allows you to build a plan through SQL or a DataFrame API against in-memory data, parquet or CSV files, run it in a multi-threaded environment, and obtain the result back in Python.

+

It also allows you to use UDFs and UDAFs for complex operations.

+

The major advantage of this library over other execution engines is that this library achieves zero-copy between Python and its execution engine: there is no cost in using UDFs, UDAFs, and collecting the results to Python apart from having to lock the GIL when running those operations.

+

Its query engine, DataFusion, is written in Rust, which makes strong assumptions about thread safety and lack of memory leaks.

+

Technically, zero-copy is achieved via the c data interface.

+
+

Install

+
pip install datafusion
+
+
+
+
+

Example

+
In [1]: from datafusion import SessionContext
+
+In [2]: ctx = SessionContext()
+
+In [3]: df = ctx.read_csv("pokemon.csv")
+
+In [4]: df.show()
+DataFrame()
++----+---------------------------+--------+--------+-------+----+--------+---------+---------+---------+-------+------------+-----------+
+| #  | Name                      | Type 1 | Type 2 | Total | HP | Attack | Defense | Sp. Atk | Sp. Def | Speed | Generation | Legendary |
++----+---------------------------+--------+--------+-------+----+--------+---------+---------+---------+-------+------------+-----------+
+| 1  | Bulbasaur                 | Grass  | Poison | 318   | 45 | 49     | 49      | 65      | 65      | 45    | 1          | false     |
+| 2  | Ivysaur                   | Grass  | Poison | 405   | 60 | 62     | 63      | 80      | 80      | 60    | 1          | false     |
+| 3  | Venusaur                  | Grass  | Poison | 525   | 80 | 82     | 83      | 100     | 100     | 80    | 1          | false     |
+| 3  | VenusaurMega Venusaur     | Grass  | Poison | 625   | 80 | 100    | 123     | 122     | 120     | 80    | 1          | false     |
+| 4  | Charmander                | Fire   |        | 309   | 39 | 52     | 43      | 60      | 50      | 65    | 1          | false     |
+| 5  | Charmeleon                | Fire   |        | 405   | 58 | 64     | 58      | 80      | 65      | 80    | 1          | false     |
+| 6  | Charizard                 | Fire   | Flying | 534   | 78 | 84     | 78      | 109     | 85      | 100   | 1          | false     |
+| 6  | CharizardMega Charizard X | Fire   | Dragon | 634   | 78 | 130    | 111     | 130     | 85      | 100   | 1          | false     |
+| 6  | CharizardMega Charizard Y | Fire   | Flying | 634   | 78 | 104    | 78      | 159     | 115     | 100   | 1          | false     |
+| 7  | Squirtle                  | Water  |        | 314   | 44 | 48     | 65      | 50      | 64      | 43    | 1          | false     |
+| 8  | Wartortle                 | Water  |        | 405   | 59 | 63     | 80      | 65      | 80      | 58    | 1          | false     |
+| 9  | Blastoise                 | Water  |        | 530   | 79 | 83     | 100     | 85      | 105     | 78    | 1          | false     |
+| 9  | BlastoiseMega Blastoise   | Water  |        | 630   | 79 | 103    | 120     | 135     | 115     | 78    | 1          | false     |
+| 10 | Caterpie                  | Bug    |        | 195   | 45 | 30     | 35      | 20      | 20      | 45    | 1          | false     |
+| 11 | Metapod                   | Bug    |        | 205   | 50 | 20     | 55      | 25      | 25      | 30    | 1          | false     |
+| 12 | Butterfree                | Bug    | Flying | 395   | 60 | 45     | 50      | 90      | 80      | 70    | 1          | false     |
+| 13 | Weedle                    | Bug    | Poison | 195   | 40 | 35     | 30      | 20      | 20      | 50    | 1          | false     |
+| 14 | Kakuna                    | Bug    | Poison | 205   | 45 | 25     | 50      | 25      | 25      | 35    | 1          | false     |
+| 15 | Beedrill                  | Bug    | Poison | 395   | 65 | 90     | 40      | 45      | 80      | 75    | 1          | false     |
+| 15 | BeedrillMega Beedrill     | Bug    | Poison | 495   | 65 | 150    | 40      | 15      | 80      | 145   | 1          | false     |
++----+---------------------------+--------+--------+-------+----+--------+---------+---------+---------+-------+------------+-----------+
+
+
+ +
+
+
+
+
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/objects.inv b/objects.inv new file mode 100644 index 000000000..c8c4788a3 Binary files /dev/null and b/objects.inv differ diff --git a/parquet b/parquet deleted file mode 160000 index e13af117d..000000000 --- a/parquet +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e13af117de7c4f0a4d9908ae3827b3ab119868f3 diff --git a/py-modindex.html b/py-modindex.html new file mode 100644 index 000000000..834803f78 --- /dev/null +++ b/py-modindex.html @@ -0,0 +1,572 @@ + + + + + + + Python Module Index — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ +
+ + + + + + +
+ +
+ + +

Python Module Index

+ +
+ d +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 
+ d
+ datafusion +
    + datafusion.catalog +
    + datafusion.context +
    + datafusion.dataframe +
    + datafusion.dataframe_formatter +
    + datafusion.expr +
    + datafusion.functions +
    + datafusion.html_formatter +
    + datafusion.input +
    + datafusion.input.base +
    + datafusion.input.location +
    + datafusion.io +
    + datafusion.object_store +
    + datafusion.options +
    + datafusion.plan +
    + datafusion.record_batch +
    + datafusion.substrait +
    + datafusion.unparser +
    + datafusion.user_defined +
+ + +
+ + + +
+
+ +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index b238e049e..000000000 --- a/pyproject.toml +++ /dev/null @@ -1,205 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[build-system] -requires = ["maturin>=1.8.1"] -build-backend = "maturin" - -[project] -name = "datafusion" -description = "Build and run queries against data" -readme = "README.md" -license = { file = "LICENSE.txt" } -requires-python = ">=3.10" -keywords = ["dataframe", "datafusion", "query-engine", "rust"] -classifiers = [ - "Development Status :: 2 - Pre-Alpha", - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "License :: OSI Approved", - "Operating System :: MacOS", - "Operating System :: Microsoft :: Windows", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", - "Programming Language :: Python", - "Programming Language :: Rust", -] -dependencies = [ - "pyarrow>=16.0.0;python_version<'3.14'", - "pyarrow>=22.0.0;python_version>='3.14'", - "typing-extensions;python_version<'3.13'", -] -dynamic = ["version"] - -[project.urls] -homepage = "https://datafusion.apache.org/python" -documentation = "https://datafusion.apache.org/python" -repository = "https://github.com/apache/datafusion-python" - -[tool.isort] -profile = "black" - -[tool.maturin] -python-source = "python" -module-name = "datafusion._internal" -include = [{ path = "Cargo.lock", format = "sdist" }] -exclude = [".asf.yaml", ".github/**", "ci/**"] -# Require Cargo.lock is up to date -locked = true -features = ["substrait"] - -[tool.pytest.ini_options] -asyncio_mode = "auto" -asyncio_default_fixture_loop_scope = "function" -addopts = "--doctest-modules" -doctest_optionflags = ["NORMALIZE_WHITESPACE", "ELLIPSIS"] -testpaths = ["python/tests", "python/datafusion"] - -# Enable docstring linting using the google style guide -[tool.ruff.lint] -select = ["ALL"] -ignore = [ - "A001", # Allow using words like min as variable names - "A002", # Allow using words like filter as variable names - "A005", # Allow module named io - "ANN401", # Allow Any for wrapper classes - "COM812", # Recommended to ignore these rules when using with ruff-format - "FBT001", # Allow boolean positional args - "FBT002", # Allow boolean positional args - "FIX002", # Allow TODO lines - consider removing at some point - "ISC001", # Recommended to ignore these rules when using with ruff-format - "N812", # Allow importing functions as `F` - "PD901", # Allow variable name df - "PLR0913", # Allow many arguments in function definition - "SLF001", # Allow accessing private members - "TD002", # Do not require author names in TODO statements - "TD003", # Allow TODO lines -] - -[tool.ruff.lint.pydocstyle] -convention = "google" - -[tool.ruff.lint.pycodestyle] -max-doc-length = 88 - -[tool.ruff.lint.flake8-boolean-trap] -extend-allowed-calls = ["datafusion.lit", "lit"] - -# Disable docstring checking for these directories -[tool.ruff.lint.per-file-ignores] -"python/tests/*" = [ - "ANN", - "ARG", - "BLE001", - "D", - "PD", - "PLC0415", - "PLR0913", - "PLR2004", - "PT004", - "PT011", - "RUF015", - "S101", - "S608", - "SLF", -] -"examples/*" = [ - "ANN001", - "ANN202", - "D", - "DTZ007", - "E501", - "INP001", - "PLR2004", - "RUF015", - "S101", - "T201", - "W505", -] -"dev/*" = [ - "ANN001", - "C", - "D", - "E", - "ERA001", - "EXE", - "N817", - "PLR", - "S", - "SIM", - "T", - "UP", -] -"benchmarks/*" = [ - "ANN001", - "BLE", - "D", - "E", - "ERA001", - "EXE", - "F", - "FURB", - "INP001", - "PLR", - "S", - "SIM", - "T", - "TD", - "TRY", - "UP", -] -"docs/*" = ["D"] -"docs/source/conf.py" = ["ANN001", "ERA001", "INP001"] - -[tool.codespell] -skip = ["./python/tests/test_functions.py", "./target", "uv.lock"] -count = true -ignore-words-list = ["IST", "ans"] - -[dependency-groups] -dev = [ - "arro3-core==0.6.5", - "codespell==2.4.1", - "maturin>=1.8.1", - "nanoarrow==0.8.0", - "numpy>1.25.0;python_version<'3.14'", - "numpy>=2.3.2;python_version>='3.14'", - "pre-commit>=4.3.0", - "pyarrow>=19.0.0", - "pygithub==2.5.0", - "pytest-asyncio>=0.23.3", - "pytest>=7.4.4", - "pyyaml>=6.0.3", - "ruff>=0.9.1", - "toml>=0.10.2", -] -docs = [ - "ipython>=8.12.3", - "jinja2>=3.1.5", - "myst-parser>=3.0.1", - "pandas>=2.0.3", - "pickleshare>=0.7.5", - "pydata-sphinx-theme==0.8.0", - "setuptools>=75.3.0", - "sphinx-autoapi>=3.4.0", - "sphinx>=7.1.2", -] diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py deleted file mode 100644 index 2e6f81166..000000000 --- a/python/datafusion/__init__.py +++ /dev/null @@ -1,160 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DataFusion python package. - -This is a Python library that binds to Apache Arrow in-memory query engine DataFusion. -See https://datafusion.apache.org/python for more information. -""" - -from __future__ import annotations - -from typing import Any - -try: - import importlib.metadata as importlib_metadata -except ImportError: - import importlib_metadata # type: ignore[import] - -# Public submodules -from . import functions, object_store, substrait, unparser - -# The following imports are okay to remain as opaque to the user. -from ._internal import Config -from .catalog import Catalog, Database, Table -from .col import col, column -from .common import DFSchema -from .context import ( - RuntimeEnvBuilder, - SessionConfig, - SessionContext, - SQLOptions, -) -from .dataframe import ( - DataFrame, - DataFrameWriteOptions, - InsertOp, - ParquetColumnOptions, - ParquetWriterOptions, -) -from .dataframe_formatter import configure_formatter -from .expr import Expr, WindowFrame -from .io import read_avro, read_csv, read_json, read_parquet -from .options import CsvReadOptions -from .plan import ExecutionPlan, LogicalPlan -from .record_batch import RecordBatch, RecordBatchStream -from .user_defined import ( - Accumulator, - AggregateUDF, - ScalarUDF, - TableFunction, - WindowUDF, - udaf, - udf, - udtf, - udwf, -) - -__version__ = importlib_metadata.version(__name__) - -__all__ = [ - "Accumulator", - "AggregateUDF", - "Catalog", - "Config", - "CsvReadOptions", - "DFSchema", - "DataFrame", - "DataFrameWriteOptions", - "Database", - "ExecutionPlan", - "Expr", - "InsertOp", - "LogicalPlan", - "ParquetColumnOptions", - "ParquetWriterOptions", - "RecordBatch", - "RecordBatchStream", - "RuntimeEnvBuilder", - "SQLOptions", - "ScalarUDF", - "SessionConfig", - "SessionContext", - "Table", - "TableFunction", - "WindowFrame", - "WindowUDF", - "catalog", - "col", - "column", - "common", - "configure_formatter", - "expr", - "functions", - "lit", - "literal", - "object_store", - "options", - "read_avro", - "read_csv", - "read_json", - "read_parquet", - "substrait", - "udaf", - "udf", - "udtf", - "udwf", - "unparser", -] - - -def literal(value: Any) -> Expr: - """Create a literal expression.""" - return Expr.literal(value) - - -def string_literal(value: str) -> Expr: - """Create a UTF8 literal expression. - - It differs from `literal` which creates a UTF8view literal. - """ - return Expr.string_literal(value) - - -def str_lit(value: str) -> Expr: - """Alias for `string_literal`.""" - return string_literal(value) - - -def lit(value: Any) -> Expr: - """Create a literal expression.""" - return Expr.literal(value) - - -def literal_with_metadata(value: Any, metadata: dict[str, str]) -> Expr: - """Creates a new expression representing a scalar value with metadata. - - Args: - value: A valid PyArrow scalar value or easily castable to one. - metadata: Metadata to attach to the expression. - """ - return Expr.literal_with_metadata(value, metadata) - - -def lit_with_metadata(value: Any, metadata: dict[str, str]) -> Expr: - """Alias for literal_with_metadata.""" - return literal_with_metadata(value, metadata) diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py deleted file mode 100644 index bc43cf349..000000000 --- a/python/datafusion/catalog.py +++ /dev/null @@ -1,371 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Data catalog providers.""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, Protocol - -import datafusion._internal as df_internal - -if TYPE_CHECKING: - import pyarrow as pa - - from datafusion import DataFrame, SessionContext - from datafusion.context import TableProviderExportable - -try: - from warnings import deprecated # Python 3.13+ -except ImportError: - from typing_extensions import deprecated # Python 3.12 - - -__all__ = [ - "Catalog", - "CatalogList", - "CatalogProvider", - "CatalogProviderList", - "Schema", - "SchemaProvider", - "Table", -] - - -class CatalogList: - """DataFusion data catalog list.""" - - def __init__(self, catalog_list: df_internal.catalog.RawCatalogList) -> None: - """This constructor is not typically called by the end user.""" - self.catalog_list = catalog_list - - def __repr__(self) -> str: - """Print a string representation of the catalog list.""" - return self.catalog_list.__repr__() - - def names(self) -> set[str]: - """This is an alias for `catalog_names`.""" - return self.catalog_names() - - def catalog_names(self) -> set[str]: - """Returns the list of schemas in this catalog.""" - return self.catalog_list.catalog_names() - - @staticmethod - def memory_catalog(ctx: SessionContext | None = None) -> CatalogList: - """Create an in-memory catalog provider list.""" - catalog_list = df_internal.catalog.RawCatalogList.memory_catalog(ctx) - return CatalogList(catalog_list) - - def catalog(self, name: str = "datafusion") -> Catalog: - """Returns the catalog with the given ``name`` from this catalog.""" - catalog = self.catalog_list.catalog(name) - - return ( - Catalog(catalog) - if isinstance(catalog, df_internal.catalog.RawCatalog) - else catalog - ) - - def register_catalog( - self, - name: str, - catalog: Catalog | CatalogProvider | CatalogProviderExportable, - ) -> Catalog | None: - """Register a catalog with this catalog list.""" - if isinstance(catalog, Catalog): - return self.catalog_list.register_catalog(name, catalog.catalog) - return self.catalog_list.register_catalog(name, catalog) - - -class Catalog: - """DataFusion data catalog.""" - - def __init__(self, catalog: df_internal.catalog.RawCatalog) -> None: - """This constructor is not typically called by the end user.""" - self.catalog = catalog - - def __repr__(self) -> str: - """Print a string representation of the catalog.""" - return self.catalog.__repr__() - - def names(self) -> set[str]: - """This is an alias for `schema_names`.""" - return self.schema_names() - - def schema_names(self) -> set[str]: - """Returns the list of schemas in this catalog.""" - return self.catalog.schema_names() - - @staticmethod - def memory_catalog(ctx: SessionContext | None = None) -> Catalog: - """Create an in-memory catalog provider.""" - catalog = df_internal.catalog.RawCatalog.memory_catalog(ctx) - return Catalog(catalog) - - def schema(self, name: str = "public") -> Schema: - """Returns the database with the given ``name`` from this catalog.""" - schema = self.catalog.schema(name) - - return ( - Schema(schema) - if isinstance(schema, df_internal.catalog.RawSchema) - else schema - ) - - @deprecated("Use `schema` instead.") - def database(self, name: str = "public") -> Schema: - """Returns the database with the given ``name`` from this catalog.""" - return self.schema(name) - - def register_schema( - self, - name: str, - schema: Schema | SchemaProvider | SchemaProviderExportable, - ) -> Schema | None: - """Register a schema with this catalog.""" - if isinstance(schema, Schema): - return self.catalog.register_schema(name, schema._raw_schema) - return self.catalog.register_schema(name, schema) - - def deregister_schema(self, name: str, cascade: bool = True) -> Schema | None: - """Deregister a schema from this catalog.""" - return self.catalog.deregister_schema(name, cascade) - - -class Schema: - """DataFusion Schema.""" - - def __init__(self, schema: df_internal.catalog.RawSchema) -> None: - """This constructor is not typically called by the end user.""" - self._raw_schema = schema - - def __repr__(self) -> str: - """Print a string representation of the schema.""" - return self._raw_schema.__repr__() - - @staticmethod - def memory_schema(ctx: SessionContext | None = None) -> Schema: - """Create an in-memory schema provider.""" - schema = df_internal.catalog.RawSchema.memory_schema(ctx) - return Schema(schema) - - def names(self) -> set[str]: - """This is an alias for `table_names`.""" - return self.table_names() - - def table_names(self) -> set[str]: - """Returns the list of all tables in this schema.""" - return self._raw_schema.table_names - - def table(self, name: str) -> Table: - """Return the table with the given ``name`` from this schema.""" - return Table(self._raw_schema.table(name)) - - def register_table( - self, - name: str, - table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset, - ) -> None: - """Register a table in this schema.""" - return self._raw_schema.register_table(name, table) - - def deregister_table(self, name: str) -> None: - """Deregister a table provider from this schema.""" - return self._raw_schema.deregister_table(name) - - def table_exist(self, name: str) -> bool: - """Determines if a table exists in this schema.""" - return self._raw_schema.table_exist(name) - - -@deprecated("Use `Schema` instead.") -class Database(Schema): - """See `Schema`.""" - - -class Table: - """A DataFusion table. - - Internally we currently support the following types of tables: - - - Tables created using built-in DataFusion methods, such as - reading from CSV or Parquet - - pyarrow datasets - - DataFusion DataFrames, which will be converted into a view - - Externally provided tables implemented with the FFI PyCapsule - interface (advanced) - """ - - __slots__ = ("_inner",) - - def __init__( - self, - table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset, - ctx: SessionContext | None = None, - ) -> None: - """Constructor.""" - self._inner = df_internal.catalog.RawTable(table, ctx) - - def __repr__(self) -> str: - """Print a string representation of the table.""" - return repr(self._inner) - - @staticmethod - @deprecated("Use Table() constructor instead.") - def from_dataset(dataset: pa.dataset.Dataset) -> Table: - """Turn a :mod:`pyarrow.dataset` ``Dataset`` into a :class:`Table`.""" - return Table(dataset) - - @property - def schema(self) -> pa.Schema: - """Returns the schema associated with this table.""" - return self._inner.schema - - @property - def kind(self) -> str: - """Returns the kind of table.""" - return self._inner.kind - - -class CatalogProviderList(ABC): - """Abstract class for defining a Python based Catalog Provider List.""" - - @abstractmethod - def catalog_names(self) -> set[str]: - """Set of the names of all catalogs in this catalog list.""" - ... - - @abstractmethod - def catalog( - self, name: str - ) -> CatalogProviderExportable | CatalogProvider | Catalog | None: - """Retrieve a specific catalog from this catalog list.""" - ... - - def register_catalog( # noqa: B027 - self, name: str, catalog: CatalogProviderExportable | CatalogProvider | Catalog - ) -> None: - """Add a catalog to this catalog list. - - This method is optional. If your catalog provides a fixed list of catalogs, you - do not need to implement this method. - """ - - -class CatalogProviderListExportable(Protocol): - """Type hint for object that has __datafusion_catalog_provider_list__ PyCapsule. - - https://docs.rs/datafusion/latest/datafusion/catalog/trait.CatalogProviderList.html - """ - - def __datafusion_catalog_provider_list__(self, session: Any) -> object: ... - - -class CatalogProvider(ABC): - """Abstract class for defining a Python based Catalog Provider.""" - - @abstractmethod - def schema_names(self) -> set[str]: - """Set of the names of all schemas in this catalog.""" - ... - - @abstractmethod - def schema(self, name: str) -> Schema | None: - """Retrieve a specific schema from this catalog.""" - ... - - def register_schema( # noqa: B027 - self, name: str, schema: SchemaProviderExportable | SchemaProvider | Schema - ) -> None: - """Add a schema to this catalog. - - This method is optional. If your catalog provides a fixed list of schemas, you - do not need to implement this method. - """ - - def deregister_schema(self, name: str, cascade: bool) -> None: # noqa: B027 - """Remove a schema from this catalog. - - This method is optional. If your catalog provides a fixed list of schemas, you - do not need to implement this method. - - Args: - name: The name of the schema to remove. - cascade: If true, deregister the tables within the schema. - """ - - -class CatalogProviderExportable(Protocol): - """Type hint for object that has __datafusion_catalog_provider__ PyCapsule. - - https://docs.rs/datafusion/latest/datafusion/catalog/trait.CatalogProvider.html - """ - - def __datafusion_catalog_provider__(self, session: Any) -> object: ... - - -class SchemaProvider(ABC): - """Abstract class for defining a Python based Schema Provider.""" - - def owner_name(self) -> str | None: - """Returns the owner of the schema. - - This is an optional method. The default return is None. - """ - return None - - @abstractmethod - def table_names(self) -> set[str]: - """Set of the names of all tables in this schema.""" - ... - - @abstractmethod - def table(self, name: str) -> Table | None: - """Retrieve a specific table from this schema.""" - ... - - def register_table( # noqa: B027 - self, name: str, table: Table | TableProviderExportable | Any - ) -> None: - """Add a table to this schema. - - This method is optional. If your schema provides a fixed list of tables, you do - not need to implement this method. - """ - - def deregister_table(self, name: str, cascade: bool) -> None: # noqa: B027 - """Remove a table from this schema. - - This method is optional. If your schema provides a fixed list of tables, you do - not need to implement this method. - """ - - @abstractmethod - def table_exist(self, name: str) -> bool: - """Returns true if the table exists in this schema.""" - ... - - -class SchemaProviderExportable(Protocol): - """Type hint for object that has __datafusion_schema_provider__ PyCapsule. - - https://docs.rs/datafusion/latest/datafusion/catalog/trait.SchemaProvider.html - """ - - def __datafusion_schema_provider__(self, session: Any) -> object: ... diff --git a/python/datafusion/col.py b/python/datafusion/col.py deleted file mode 100644 index 1141dc092..000000000 --- a/python/datafusion/col.py +++ /dev/null @@ -1,45 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Col class.""" - -from datafusion.expr import Expr - - -class Col: - """Create a column expression. - - This helper class allows an extra syntax of creating columns using the __getattr__ - method. - """ - - def __call__(self, value: str) -> Expr: - """Create a column expression.""" - return Expr.column(value) - - def __getattr__(self, value: str) -> Expr: - """Create a column using attribute syntax.""" - # For autocomplete to work with IPython - if value.startswith("__wrapped__"): - return getattr(type(self), value) - - return Expr.column(value) - - -col: Col = Col() -column: Col = Col() -__all__ = ["col", "column"] diff --git a/python/datafusion/common.py b/python/datafusion/common.py deleted file mode 100644 index c689a816d..000000000 --- a/python/datafusion/common.py +++ /dev/null @@ -1,69 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Common data types used throughout the DataFusion project.""" - -from enum import Enum - -from ._internal import common as common_internal - -# TODO: these should all have proper wrapper classes - -DFSchema = common_internal.DFSchema -DataType = common_internal.DataType -DataTypeMap = common_internal.DataTypeMap -PythonType = common_internal.PythonType -RexType = common_internal.RexType -SqlFunction = common_internal.SqlFunction -SqlSchema = common_internal.SqlSchema -SqlStatistics = common_internal.SqlStatistics -SqlTable = common_internal.SqlTable -SqlType = common_internal.SqlType -SqlView = common_internal.SqlView -TableType = common_internal.TableType -TableSource = common_internal.TableSource -Constraints = common_internal.Constraints - -__all__ = [ - "Constraints", - "DFSchema", - "DataType", - "DataTypeMap", - "NullTreatment", - "PythonType", - "RexType", - "SqlFunction", - "SqlSchema", - "SqlStatistics", - "SqlTable", - "SqlType", - "SqlView", - "TableSource", - "TableType", -] - - -class NullTreatment(Enum): - """Describe how null values are to be treated by functions. - - This is used primarily by aggregate and window functions. It can be set on - these functions using the builder approach described in - ref:`_window_functions` and ref:`_aggregation` in the online documentation. - - """ - - RESPECT_NULLS = common_internal.NullTreatment.RESPECT_NULLS - IGNORE_NULLS = common_internal.NullTreatment.IGNORE_NULLS diff --git a/python/datafusion/context.py b/python/datafusion/context.py deleted file mode 100644 index 0d8259774..000000000 --- a/python/datafusion/context.py +++ /dev/null @@ -1,1386 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Session Context and it's associated configuration.""" - -from __future__ import annotations - -import uuid -import warnings -from typing import TYPE_CHECKING, Any, Protocol - -try: - from warnings import deprecated # Python 3.13+ -except ImportError: - from typing_extensions import deprecated # Python 3.12 - - -import pyarrow as pa - -from datafusion.catalog import ( - Catalog, - CatalogList, - CatalogProviderExportable, - CatalogProviderList, - CatalogProviderListExportable, -) -from datafusion.dataframe import DataFrame -from datafusion.expr import sort_list_to_raw_sort_list -from datafusion.options import ( - DEFAULT_MAX_INFER_SCHEMA, - CsvReadOptions, - _convert_table_partition_cols, -) -from datafusion.record_batch import RecordBatchStream - -from ._internal import RuntimeEnvBuilder as RuntimeEnvBuilderInternal -from ._internal import SessionConfig as SessionConfigInternal -from ._internal import SessionContext as SessionContextInternal -from ._internal import SQLOptions as SQLOptionsInternal -from ._internal import expr as expr_internal - -if TYPE_CHECKING: - import pathlib - from collections.abc import Sequence - - import pandas as pd - import polars as pl # type: ignore[import] - - from datafusion.catalog import CatalogProvider, Table - from datafusion.expr import SortKey - from datafusion.plan import ExecutionPlan, LogicalPlan - from datafusion.user_defined import ( - AggregateUDF, - ScalarUDF, - TableFunction, - WindowUDF, - ) - - -class ArrowStreamExportable(Protocol): - """Type hint for object exporting Arrow C Stream via Arrow PyCapsule Interface. - - https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html - """ - - def __arrow_c_stream__( # noqa: D105 - self, requested_schema: object | None = None - ) -> object: ... - - -class ArrowArrayExportable(Protocol): - """Type hint for object exporting Arrow C Array via Arrow PyCapsule Interface. - - https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html - """ - - def __arrow_c_array__( # noqa: D105 - self, requested_schema: object | None = None - ) -> tuple[object, object]: ... - - -class TableProviderExportable(Protocol): - """Type hint for object that has __datafusion_table_provider__ PyCapsule. - - https://datafusion.apache.org/python/user-guide/io/table_provider.html - """ - - def __datafusion_table_provider__(self, session: Any) -> object: ... # noqa: D105 - - -class SessionConfig: - """Session configuration options.""" - - def __init__(self, config_options: dict[str, str] | None = None) -> None: - """Create a new :py:class:`SessionConfig` with the given configuration options. - - Args: - config_options: Configuration options. - """ - self.config_internal = SessionConfigInternal(config_options) - - def with_create_default_catalog_and_schema( - self, enabled: bool = True - ) -> SessionConfig: - """Control if the default catalog and schema will be automatically created. - - Args: - enabled: Whether the default catalog and schema will be - automatically created. - - Returns: - A new :py:class:`SessionConfig` object with the updated setting. - """ - self.config_internal = ( - self.config_internal.with_create_default_catalog_and_schema(enabled) - ) - return self - - def with_default_catalog_and_schema( - self, catalog: str, schema: str - ) -> SessionConfig: - """Select a name for the default catalog and schema. - - Args: - catalog: Catalog name. - schema: Schema name. - - Returns: - A new :py:class:`SessionConfig` object with the updated setting. - """ - self.config_internal = self.config_internal.with_default_catalog_and_schema( - catalog, schema - ) - return self - - def with_information_schema(self, enabled: bool = True) -> SessionConfig: - """Enable or disable the inclusion of ``information_schema`` virtual tables. - - Args: - enabled: Whether to include ``information_schema`` virtual tables. - - Returns: - A new :py:class:`SessionConfig` object with the updated setting. - """ - self.config_internal = self.config_internal.with_information_schema(enabled) - return self - - def with_batch_size(self, batch_size: int) -> SessionConfig: - """Customize batch size. - - Args: - batch_size: Batch size. - - Returns: - A new :py:class:`SessionConfig` object with the updated setting. - """ - self.config_internal = self.config_internal.with_batch_size(batch_size) - return self - - def with_target_partitions(self, target_partitions: int) -> SessionConfig: - """Customize the number of target partitions for query execution. - - Increasing partitions can increase concurrency. - - Args: - target_partitions: Number of target partitions. - - Returns: - A new :py:class:`SessionConfig` object with the updated setting. - """ - self.config_internal = self.config_internal.with_target_partitions( - target_partitions - ) - return self - - def with_repartition_aggregations(self, enabled: bool = True) -> SessionConfig: - """Enable or disable the use of repartitioning for aggregations. - - Enabling this improves parallelism. - - Args: - enabled: Whether to use repartitioning for aggregations. - - Returns: - A new :py:class:`SessionConfig` object with the updated setting. - """ - self.config_internal = self.config_internal.with_repartition_aggregations( - enabled - ) - return self - - def with_repartition_joins(self, enabled: bool = True) -> SessionConfig: - """Enable or disable the use of repartitioning for joins to improve parallelism. - - Args: - enabled: Whether to use repartitioning for joins. - - Returns: - A new :py:class:`SessionConfig` object with the updated setting. - """ - self.config_internal = self.config_internal.with_repartition_joins(enabled) - return self - - def with_repartition_windows(self, enabled: bool = True) -> SessionConfig: - """Enable or disable the use of repartitioning for window functions. - - This may improve parallelism. - - Args: - enabled: Whether to use repartitioning for window functions. - - Returns: - A new :py:class:`SessionConfig` object with the updated setting. - """ - self.config_internal = self.config_internal.with_repartition_windows(enabled) - return self - - def with_repartition_sorts(self, enabled: bool = True) -> SessionConfig: - """Enable or disable the use of repartitioning for window functions. - - This may improve parallelism. - - Args: - enabled: Whether to use repartitioning for window functions. - - Returns: - A new :py:class:`SessionConfig` object with the updated setting. - """ - self.config_internal = self.config_internal.with_repartition_sorts(enabled) - return self - - def with_repartition_file_scans(self, enabled: bool = True) -> SessionConfig: - """Enable or disable the use of repartitioning for file scans. - - Args: - enabled: Whether to use repartitioning for file scans. - - Returns: - A new :py:class:`SessionConfig` object with the updated setting. - """ - self.config_internal = self.config_internal.with_repartition_file_scans(enabled) - return self - - def with_repartition_file_min_size(self, size: int) -> SessionConfig: - """Set minimum file range size for repartitioning scans. - - Args: - size: Minimum file range size. - - Returns: - A new :py:class:`SessionConfig` object with the updated setting. - """ - self.config_internal = self.config_internal.with_repartition_file_min_size(size) - return self - - def with_parquet_pruning(self, enabled: bool = True) -> SessionConfig: - """Enable or disable the use of pruning predicate for parquet readers. - - Pruning predicates will enable the reader to skip row groups. - - Args: - enabled: Whether to use pruning predicate for parquet readers. - - Returns: - A new :py:class:`SessionConfig` object with the updated setting. - """ - self.config_internal = self.config_internal.with_parquet_pruning(enabled) - return self - - def set(self, key: str, value: str) -> SessionConfig: - """Set a configuration option. - - Args: - key: Option key. - value: Option value. - - Returns: - A new :py:class:`SessionConfig` object with the updated setting. - """ - self.config_internal = self.config_internal.set(key, value) - return self - - -class RuntimeEnvBuilder: - """Runtime configuration options.""" - - def __init__(self) -> None: - """Create a new :py:class:`RuntimeEnvBuilder` with default values.""" - self.config_internal = RuntimeEnvBuilderInternal() - - def with_disk_manager_disabled(self) -> RuntimeEnvBuilder: - """Disable the disk manager, attempts to create temporary files will error. - - Returns: - A new :py:class:`RuntimeEnvBuilder` object with the updated setting. - """ - self.config_internal = self.config_internal.with_disk_manager_disabled() - return self - - def with_disk_manager_os(self) -> RuntimeEnvBuilder: - """Use the operating system's temporary directory for disk manager. - - Returns: - A new :py:class:`RuntimeEnvBuilder` object with the updated setting. - """ - self.config_internal = self.config_internal.with_disk_manager_os() - return self - - def with_disk_manager_specified( - self, *paths: str | pathlib.Path - ) -> RuntimeEnvBuilder: - """Use the specified paths for the disk manager's temporary files. - - Args: - paths: Paths to use for the disk manager's temporary files. - - Returns: - A new :py:class:`RuntimeEnvBuilder` object with the updated setting. - """ - paths_list = [str(p) for p in paths] - self.config_internal = self.config_internal.with_disk_manager_specified( - paths_list - ) - return self - - def with_unbounded_memory_pool(self) -> RuntimeEnvBuilder: - """Use an unbounded memory pool. - - Returns: - A new :py:class:`RuntimeEnvBuilder` object with the updated setting. - """ - self.config_internal = self.config_internal.with_unbounded_memory_pool() - return self - - def with_fair_spill_pool(self, size: int) -> RuntimeEnvBuilder: - """Use a fair spill pool with the specified size. - - This pool works best when you know beforehand the query has multiple spillable - operators that will likely all need to spill. Sometimes it will cause spills - even when there was sufficient memory (reserved for other operators) to avoid - doing so:: - - ┌───────────────────────z──────────────────────z───────────────┐ - │ z z │ - │ z z │ - │ Spillable z Unspillable z Free │ - │ Memory z Memory z Memory │ - │ z z │ - │ z z │ - └───────────────────────z──────────────────────z───────────────┘ - - Args: - size: Size of the memory pool in bytes. - - Returns: - A new :py:class:`RuntimeEnvBuilder` object with the updated setting. - - Examples usage:: - - config = RuntimeEnvBuilder().with_fair_spill_pool(1024) - """ - self.config_internal = self.config_internal.with_fair_spill_pool(size) - return self - - def with_greedy_memory_pool(self, size: int) -> RuntimeEnvBuilder: - """Use a greedy memory pool with the specified size. - - This pool works well for queries that do not need to spill or have a single - spillable operator. See :py:func:`with_fair_spill_pool` if there are - multiple spillable operators that all will spill. - - Args: - size: Size of the memory pool in bytes. - - Returns: - A new :py:class:`RuntimeEnvBuilder` object with the updated setting. - - Example usage:: - - config = RuntimeEnvBuilder().with_greedy_memory_pool(1024) - """ - self.config_internal = self.config_internal.with_greedy_memory_pool(size) - return self - - def with_temp_file_path(self, path: str | pathlib.Path) -> RuntimeEnvBuilder: - """Use the specified path to create any needed temporary files. - - Args: - path: Path to use for temporary files. - - Returns: - A new :py:class:`RuntimeEnvBuilder` object with the updated setting. - - Example usage:: - - config = RuntimeEnvBuilder().with_temp_file_path("/tmp") - """ - self.config_internal = self.config_internal.with_temp_file_path(str(path)) - return self - - -@deprecated("Use `RuntimeEnvBuilder` instead.") -class RuntimeConfig(RuntimeEnvBuilder): - """See `RuntimeEnvBuilder`.""" - - -class SQLOptions: - """Options to be used when performing SQL queries.""" - - def __init__(self) -> None: - """Create a new :py:class:`SQLOptions` with default values. - - The default values are: - - DDL commands are allowed - - DML commands are allowed - - Statements are allowed - """ - self.options_internal = SQLOptionsInternal() - - def with_allow_ddl(self, allow: bool = True) -> SQLOptions: - """Should DDL (Data Definition Language) commands be run? - - Examples of DDL commands include ``CREATE TABLE`` and ``DROP TABLE``. - - Args: - allow: Allow DDL commands to be run. - - Returns: - A new :py:class:`SQLOptions` object with the updated setting. - - Example usage:: - - options = SQLOptions().with_allow_ddl(True) - """ - self.options_internal = self.options_internal.with_allow_ddl(allow) - return self - - def with_allow_dml(self, allow: bool = True) -> SQLOptions: - """Should DML (Data Manipulation Language) commands be run? - - Examples of DML commands include ``INSERT INTO`` and ``DELETE``. - - Args: - allow: Allow DML commands to be run. - - Returns: - A new :py:class:`SQLOptions` object with the updated setting. - - Example usage:: - - options = SQLOptions().with_allow_dml(True) - """ - self.options_internal = self.options_internal.with_allow_dml(allow) - return self - - def with_allow_statements(self, allow: bool = True) -> SQLOptions: - """Should statements such as ``SET VARIABLE`` and ``BEGIN TRANSACTION`` be run? - - Args: - allow: Allow statements to be run. - - Returns: - A new :py:class:SQLOptions` object with the updated setting. - - Example usage:: - - options = SQLOptions().with_allow_statements(True) - """ - self.options_internal = self.options_internal.with_allow_statements(allow) - return self - - -class SessionContext: - """This is the main interface for executing queries and creating DataFrames. - - See :ref:`user_guide_concepts` in the online documentation for more information. - """ - - def __init__( - self, - config: SessionConfig | None = None, - runtime: RuntimeEnvBuilder | None = None, - ) -> None: - """Main interface for executing queries with DataFusion. - - Maintains the state of the connection between a user and an instance - of the connection between a user and an instance of the DataFusion - engine. - - Args: - config: Session configuration options. - runtime: Runtime configuration options. - - Example usage: - - The following example demonstrates how to use the context to execute - a query against a CSV data source using the :py:class:`DataFrame` API:: - - from datafusion import SessionContext - - ctx = SessionContext() - df = ctx.read_csv("data.csv") - """ - config = config.config_internal if config is not None else None - runtime = runtime.config_internal if runtime is not None else None - - self.ctx = SessionContextInternal(config, runtime) - - def __repr__(self) -> str: - """Print a string representation of the Session Context.""" - return self.ctx.__repr__() - - @classmethod - def global_ctx(cls) -> SessionContext: - """Retrieve the global context as a `SessionContext` wrapper. - - Returns: - A `SessionContext` object that wraps the global `SessionContextInternal`. - """ - internal_ctx = SessionContextInternal.global_ctx() - wrapper = cls() - wrapper.ctx = internal_ctx - return wrapper - - def enable_url_table(self) -> SessionContext: - """Control if local files can be queried as tables. - - Returns: - A new :py:class:`SessionContext` object with url table enabled. - """ - klass = self.__class__ - obj = klass.__new__(klass) - obj.ctx = self.ctx.enable_url_table() - return obj - - def register_object_store( - self, schema: str, store: Any, host: str | None = None - ) -> None: - """Add a new object store into the session. - - Args: - schema: The data source schema. - store: The :py:class:`~datafusion.object_store.ObjectStore` to register. - host: URL for the host. - """ - self.ctx.register_object_store(schema, store, host) - - def register_listing_table( - self, - name: str, - path: str | pathlib.Path, - table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, - file_extension: str = ".parquet", - schema: pa.Schema | None = None, - file_sort_order: Sequence[Sequence[SortKey]] | None = None, - ) -> None: - """Register multiple files as a single table. - - Registers a :py:class:`~datafusion.catalog.Table` that can assemble multiple - files from locations in an :py:class:`~datafusion.object_store.ObjectStore` - instance. - - Args: - name: Name of the resultant table. - path: Path to the file to register. - table_partition_cols: Partition columns. - file_extension: File extension of the provided table. - schema: The data source schema. - file_sort_order: Sort order for the file. Each sort key can be - specified as a column name (``str``), an expression - (``Expr``), or a ``SortExpr``. - """ - if table_partition_cols is None: - table_partition_cols = [] - table_partition_cols = _convert_table_partition_cols(table_partition_cols) - self.ctx.register_listing_table( - name, - str(path), - table_partition_cols, - file_extension, - schema, - self._convert_file_sort_order(file_sort_order), - ) - - def sql( - self, - query: str, - options: SQLOptions | None = None, - param_values: dict[str, Any] | None = None, - **named_params: Any, - ) -> DataFrame: - """Create a :py:class:`~datafusion.DataFrame` from SQL query text. - - See the online documentation for a description of how to perform - parameterized substitution via either the ``param_values`` option - or passing in ``named_params``. - - Note: This API implements DDL statements such as ``CREATE TABLE`` and - ``CREATE VIEW`` and DML statements such as ``INSERT INTO`` with in-memory - default implementation.See - :py:func:`~datafusion.context.SessionContext.sql_with_options`. - - Args: - query: SQL query text. - options: If provided, the query will be validated against these options. - param_values: Provides substitution of scalar values in the query - after parsing. - named_params: Provides string or DataFrame substitution in the query string. - - Returns: - DataFrame representation of the SQL query. - """ - - def value_to_scalar(value: Any) -> pa.Scalar: - if isinstance(value, pa.Scalar): - return value - return pa.scalar(value) - - def value_to_string(value: Any) -> str: - if isinstance(value, DataFrame): - view_name = str(uuid.uuid4()).replace("-", "_") - view_name = f"view_{view_name}" - view = value.df.into_view(temporary=True) - self.ctx.register_table(view_name, view) - return view_name - return str(value) - - param_values = ( - {name: value_to_scalar(value) for (name, value) in param_values.items()} - if param_values is not None - else {} - ) - param_strings = ( - {name: value_to_string(value) for (name, value) in named_params.items()} - if named_params is not None - else {} - ) - - options_raw = options.options_internal if options is not None else None - - return DataFrame( - self.ctx.sql_with_options( - query, - options=options_raw, - param_values=param_values, - param_strings=param_strings, - ) - ) - - def sql_with_options( - self, - query: str, - options: SQLOptions, - param_values: dict[str, Any] | None = None, - **named_params: Any, - ) -> DataFrame: - """Create a :py:class:`~datafusion.dataframe.DataFrame` from SQL query text. - - This function will first validate that the query is allowed by the - provided options. - - Args: - query: SQL query text. - options: SQL options. - param_values: Provides substitution of scalar values in the query - after parsing. - named_params: Provides string or DataFrame substitution in the query string. - - Returns: - DataFrame representation of the SQL query. - """ - return self.sql( - query, options=options, param_values=param_values, **named_params - ) - - def create_dataframe( - self, - partitions: list[list[pa.RecordBatch]], - name: str | None = None, - schema: pa.Schema | None = None, - ) -> DataFrame: - """Create and return a dataframe using the provided partitions. - - Args: - partitions: :py:class:`pa.RecordBatch` partitions to register. - name: Resultant dataframe name. - schema: Schema for the partitions. - - Returns: - DataFrame representation of the SQL query. - """ - return DataFrame(self.ctx.create_dataframe(partitions, name, schema)) - - def create_dataframe_from_logical_plan(self, plan: LogicalPlan) -> DataFrame: - """Create a :py:class:`~datafusion.dataframe.DataFrame` from an existing plan. - - Args: - plan: Logical plan. - - Returns: - DataFrame representation of the logical plan. - """ - return DataFrame(self.ctx.create_dataframe_from_logical_plan(plan._raw_plan)) - - def from_pylist( - self, data: list[dict[str, Any]], name: str | None = None - ) -> DataFrame: - """Create a :py:class:`~datafusion.dataframe.DataFrame` from a list. - - Args: - data: List of dictionaries. - name: Name of the DataFrame. - - Returns: - DataFrame representation of the list of dictionaries. - """ - return DataFrame(self.ctx.from_pylist(data, name)) - - def from_pydict( - self, data: dict[str, list[Any]], name: str | None = None - ) -> DataFrame: - """Create a :py:class:`~datafusion.dataframe.DataFrame` from a dictionary. - - Args: - data: Dictionary of lists. - name: Name of the DataFrame. - - Returns: - DataFrame representation of the dictionary of lists. - """ - return DataFrame(self.ctx.from_pydict(data, name)) - - def from_arrow( - self, - data: ArrowStreamExportable | ArrowArrayExportable, - name: str | None = None, - ) -> DataFrame: - """Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow source. - - The Arrow data source can be any object that implements either - ``__arrow_c_stream__`` or ``__arrow_c_array__``. For the latter, it must return - a struct array. - - Arrow data can be Polars, Pandas, Pyarrow etc. - - Args: - data: Arrow data source. - name: Name of the DataFrame. - - Returns: - DataFrame representation of the Arrow table. - """ - return DataFrame(self.ctx.from_arrow(data, name)) - - @deprecated("Use ``from_arrow`` instead.") - def from_arrow_table(self, data: pa.Table, name: str | None = None) -> DataFrame: - """Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow table. - - This is an alias for :py:func:`from_arrow`. - """ - return self.from_arrow(data, name) - - def from_pandas(self, data: pd.DataFrame, name: str | None = None) -> DataFrame: - """Create a :py:class:`~datafusion.dataframe.DataFrame` from a Pandas DataFrame. - - Args: - data: Pandas DataFrame. - name: Name of the DataFrame. - - Returns: - DataFrame representation of the Pandas DataFrame. - """ - return DataFrame(self.ctx.from_pandas(data, name)) - - def from_polars(self, data: pl.DataFrame, name: str | None = None) -> DataFrame: - """Create a :py:class:`~datafusion.dataframe.DataFrame` from a Polars DataFrame. - - Args: - data: Polars DataFrame. - name: Name of the DataFrame. - - Returns: - DataFrame representation of the Polars DataFrame. - """ - return DataFrame(self.ctx.from_polars(data, name)) - - # https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116 - # is the discussion on how we arrived at adding register_view - def register_view(self, name: str, df: DataFrame) -> None: - """Register a :py:class:`~datafusion.dataframe.DataFrame` as a view. - - Args: - name (str): The name to register the view under. - df (DataFrame): The DataFrame to be converted into a view and registered. - """ - view = df.into_view() - self.ctx.register_table(name, view) - - def register_table( - self, - name: str, - table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset, - ) -> None: - """Register a :py:class:`~datafusion.Table` with this context. - - The registered table can be referenced from SQL statements executed against - this context. - - Args: - name: Name of the resultant table. - table: Any object that can be converted into a :class:`Table`. - """ - self.ctx.register_table(name, table) - - def deregister_table(self, name: str) -> None: - """Remove a table from the session.""" - self.ctx.deregister_table(name) - - def catalog_names(self) -> set[str]: - """Returns the list of catalogs in this context.""" - return self.ctx.catalog_names() - - def register_catalog_provider_list( - self, - provider: CatalogProviderListExportable | CatalogProviderList | CatalogList, - ) -> None: - """Register a catalog provider list.""" - if isinstance(provider, CatalogList): - self.ctx.register_catalog_provider_list(provider.catalog) - else: - self.ctx.register_catalog_provider_list(provider) - - def register_catalog_provider( - self, name: str, provider: CatalogProviderExportable | CatalogProvider | Catalog - ) -> None: - """Register a catalog provider.""" - if isinstance(provider, Catalog): - self.ctx.register_catalog_provider(name, provider.catalog) - else: - self.ctx.register_catalog_provider(name, provider) - - @deprecated("Use register_table() instead.") - def register_table_provider( - self, - name: str, - provider: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset, - ) -> None: - """Register a table provider. - - Deprecated: use :meth:`register_table` instead. - """ - self.register_table(name, provider) - - def register_udtf(self, func: TableFunction) -> None: - """Register a user defined table function.""" - self.ctx.register_udtf(func._udtf) - - def register_record_batches( - self, name: str, partitions: list[list[pa.RecordBatch]] - ) -> None: - """Register record batches as a table. - - This function will convert the provided partitions into a table and - register it into the session using the given name. - - Args: - name: Name of the resultant table. - partitions: Record batches to register as a table. - """ - self.ctx.register_record_batches(name, partitions) - - def register_parquet( - self, - name: str, - path: str | pathlib.Path, - table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, - parquet_pruning: bool = True, - file_extension: str = ".parquet", - skip_metadata: bool = True, - schema: pa.Schema | None = None, - file_sort_order: Sequence[Sequence[SortKey]] | None = None, - ) -> None: - """Register a Parquet file as a table. - - The registered table can be referenced from SQL statement executed - against this context. - - Args: - name: Name of the table to register. - path: Path to the Parquet file. - table_partition_cols: Partition columns. - parquet_pruning: Whether the parquet reader should use the - predicate to prune row groups. - file_extension: File extension; only files with this extension are - selected for data input. - skip_metadata: Whether the parquet reader should skip any metadata - that may be in the file schema. This can help avoid schema - conflicts due to metadata. - schema: The data source schema. - file_sort_order: Sort order for the file. Each sort key can be - specified as a column name (``str``), an expression - (``Expr``), or a ``SortExpr``. - """ - if table_partition_cols is None: - table_partition_cols = [] - table_partition_cols = _convert_table_partition_cols(table_partition_cols) - self.ctx.register_parquet( - name, - str(path), - table_partition_cols, - parquet_pruning, - file_extension, - skip_metadata, - schema, - self._convert_file_sort_order(file_sort_order), - ) - - def register_csv( - self, - name: str, - path: str | pathlib.Path | list[str | pathlib.Path], - schema: pa.Schema | None = None, - has_header: bool = True, - delimiter: str = ",", - schema_infer_max_records: int = DEFAULT_MAX_INFER_SCHEMA, - file_extension: str = ".csv", - file_compression_type: str | None = None, - options: CsvReadOptions | None = None, - ) -> None: - """Register a CSV file as a table. - - The registered table can be referenced from SQL statement executed against. - - Args: - name: Name of the table to register. - path: Path to the CSV file. It also accepts a list of Paths. - schema: An optional schema representing the CSV file. If None, the - CSV reader will try to infer it based on data in file. - has_header: Whether the CSV file have a header. If schema inference - is run on a file with no headers, default column names are - created. - delimiter: An optional column delimiter. - schema_infer_max_records: Maximum number of rows to read from CSV - files for schema inference if needed. - file_extension: File extension; only files with this extension are - selected for data input. - file_compression_type: File compression type. - options: Set advanced options for CSV reading. This cannot be - combined with any of the other options in this method. - """ - path_arg = [str(p) for p in path] if isinstance(path, list) else str(path) - - if options is not None and ( - schema is not None - or not has_header - or delimiter != "," - or schema_infer_max_records != DEFAULT_MAX_INFER_SCHEMA - or file_extension != ".csv" - or file_compression_type is not None - ): - message = ( - "Combining CsvReadOptions parameter with additional options " - "is not supported. Use CsvReadOptions to set parameters." - ) - warnings.warn( - message, - category=UserWarning, - stacklevel=2, - ) - - options = ( - options - if options is not None - else CsvReadOptions( - schema=schema, - has_header=has_header, - delimiter=delimiter, - schema_infer_max_records=schema_infer_max_records, - file_extension=file_extension, - file_compression_type=file_compression_type, - ) - ) - - self.ctx.register_csv( - name, - path_arg, - options.to_inner(), - ) - - def register_json( - self, - name: str, - path: str | pathlib.Path, - schema: pa.Schema | None = None, - schema_infer_max_records: int = 1000, - file_extension: str = ".json", - table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, - file_compression_type: str | None = None, - ) -> None: - """Register a JSON file as a table. - - The registered table can be referenced from SQL statement executed - against this context. - - Args: - name: Name of the table to register. - path: Path to the JSON file. - schema: The data source schema. - schema_infer_max_records: Maximum number of rows to read from JSON - files for schema inference if needed. - file_extension: File extension; only files with this extension are - selected for data input. - table_partition_cols: Partition columns. - file_compression_type: File compression type. - """ - if table_partition_cols is None: - table_partition_cols = [] - table_partition_cols = _convert_table_partition_cols(table_partition_cols) - self.ctx.register_json( - name, - str(path), - schema, - schema_infer_max_records, - file_extension, - table_partition_cols, - file_compression_type, - ) - - def register_avro( - self, - name: str, - path: str | pathlib.Path, - schema: pa.Schema | None = None, - file_extension: str = ".avro", - table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, - ) -> None: - """Register an Avro file as a table. - - The registered table can be referenced from SQL statement executed against - this context. - - Args: - name: Name of the table to register. - path: Path to the Avro file. - schema: The data source schema. - file_extension: File extension to select. - table_partition_cols: Partition columns. - """ - if table_partition_cols is None: - table_partition_cols = [] - table_partition_cols = _convert_table_partition_cols(table_partition_cols) - self.ctx.register_avro( - name, str(path), schema, file_extension, table_partition_cols - ) - - def register_dataset(self, name: str, dataset: pa.dataset.Dataset) -> None: - """Register a :py:class:`pa.dataset.Dataset` as a table. - - Args: - name: Name of the table to register. - dataset: PyArrow dataset. - """ - self.ctx.register_dataset(name, dataset) - - def register_udf(self, udf: ScalarUDF) -> None: - """Register a user-defined function (UDF) with the context.""" - self.ctx.register_udf(udf._udf) - - def register_udaf(self, udaf: AggregateUDF) -> None: - """Register a user-defined aggregation function (UDAF) with the context.""" - self.ctx.register_udaf(udaf._udaf) - - def register_udwf(self, udwf: WindowUDF) -> None: - """Register a user-defined window function (UDWF) with the context.""" - self.ctx.register_udwf(udwf._udwf) - - def catalog(self, name: str = "datafusion") -> Catalog: - """Retrieve a catalog by name.""" - return Catalog(self.ctx.catalog(name)) - - @deprecated( - "Use the catalog provider interface ``SessionContext.Catalog`` to " - "examine available catalogs, schemas and tables" - ) - def tables(self) -> set[str]: - """Deprecated.""" - return self.ctx.tables() - - def table(self, name: str) -> DataFrame: - """Retrieve a previously registered table by name.""" - return DataFrame(self.ctx.table(name)) - - def table_exist(self, name: str) -> bool: - """Return whether a table with the given name exists.""" - return self.ctx.table_exist(name) - - def empty_table(self) -> DataFrame: - """Create an empty :py:class:`~datafusion.dataframe.DataFrame`.""" - return DataFrame(self.ctx.empty_table()) - - def session_id(self) -> str: - """Return an id that uniquely identifies this :py:class:`SessionContext`.""" - return self.ctx.session_id() - - def read_json( - self, - path: str | pathlib.Path, - schema: pa.Schema | None = None, - schema_infer_max_records: int = 1000, - file_extension: str = ".json", - table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, - file_compression_type: str | None = None, - ) -> DataFrame: - """Read a line-delimited JSON data source. - - Args: - path: Path to the JSON file. - schema: The data source schema. - schema_infer_max_records: Maximum number of rows to read from JSON - files for schema inference if needed. - file_extension: File extension; only files with this extension are - selected for data input. - table_partition_cols: Partition columns. - file_compression_type: File compression type. - - Returns: - DataFrame representation of the read JSON files. - """ - if table_partition_cols is None: - table_partition_cols = [] - table_partition_cols = _convert_table_partition_cols(table_partition_cols) - return DataFrame( - self.ctx.read_json( - str(path), - schema, - schema_infer_max_records, - file_extension, - table_partition_cols, - file_compression_type, - ) - ) - - def read_csv( - self, - path: str | pathlib.Path | list[str] | list[pathlib.Path], - schema: pa.Schema | None = None, - has_header: bool = True, - delimiter: str = ",", - schema_infer_max_records: int = DEFAULT_MAX_INFER_SCHEMA, - file_extension: str = ".csv", - table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, - file_compression_type: str | None = None, - options: CsvReadOptions | None = None, - ) -> DataFrame: - """Read a CSV data source. - - Args: - path: Path to the CSV file - schema: An optional schema representing the CSV files. If None, the - CSV reader will try to infer it based on data in file. - has_header: Whether the CSV file have a header. If schema inference - is run on a file with no headers, default column names are - created. - delimiter: An optional column delimiter. - schema_infer_max_records: Maximum number of rows to read from CSV - files for schema inference if needed. - file_extension: File extension; only files with this extension are - selected for data input. - table_partition_cols: Partition columns. - file_compression_type: File compression type. - options: Set advanced options for CSV reading. This cannot be - combined with any of the other options in this method. - - Returns: - DataFrame representation of the read CSV files - """ - path_arg = [str(p) for p in path] if isinstance(path, list) else str(path) - - if options is not None and ( - schema is not None - or not has_header - or delimiter != "," - or schema_infer_max_records != DEFAULT_MAX_INFER_SCHEMA - or file_extension != ".csv" - or table_partition_cols is not None - or file_compression_type is not None - ): - message = ( - "Combining CsvReadOptions parameter with additional options " - "is not supported. Use CsvReadOptions to set parameters." - ) - warnings.warn( - message, - category=UserWarning, - stacklevel=2, - ) - - options = ( - options - if options is not None - else CsvReadOptions( - schema=schema, - has_header=has_header, - delimiter=delimiter, - schema_infer_max_records=schema_infer_max_records, - file_extension=file_extension, - table_partition_cols=table_partition_cols, - file_compression_type=file_compression_type, - ) - ) - - return DataFrame( - self.ctx.read_csv( - path_arg, - options.to_inner(), - ) - ) - - def read_parquet( - self, - path: str | pathlib.Path, - table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, - parquet_pruning: bool = True, - file_extension: str = ".parquet", - skip_metadata: bool = True, - schema: pa.Schema | None = None, - file_sort_order: Sequence[Sequence[SortKey]] | None = None, - ) -> DataFrame: - """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. - - Args: - path: Path to the Parquet file. - table_partition_cols: Partition columns. - parquet_pruning: Whether the parquet reader should use the predicate - to prune row groups. - file_extension: File extension; only files with this extension are - selected for data input. - skip_metadata: Whether the parquet reader should skip any metadata - that may be in the file schema. This can help avoid schema - conflicts due to metadata. - schema: An optional schema representing the parquet files. If None, - the parquet reader will try to infer it based on data in the - file. - file_sort_order: Sort order for the file. Each sort key can be - specified as a column name (``str``), an expression - (``Expr``), or a ``SortExpr``. - - Returns: - DataFrame representation of the read Parquet files - """ - if table_partition_cols is None: - table_partition_cols = [] - table_partition_cols = _convert_table_partition_cols(table_partition_cols) - file_sort_order = self._convert_file_sort_order(file_sort_order) - return DataFrame( - self.ctx.read_parquet( - str(path), - table_partition_cols, - parquet_pruning, - file_extension, - skip_metadata, - schema, - file_sort_order, - ) - ) - - def read_avro( - self, - path: str | pathlib.Path, - schema: pa.Schema | None = None, - file_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, - file_extension: str = ".avro", - ) -> DataFrame: - """Create a :py:class:`DataFrame` for reading Avro data source. - - Args: - path: Path to the Avro file. - schema: The data source schema. - file_partition_cols: Partition columns. - file_extension: File extension to select. - - Returns: - DataFrame representation of the read Avro file - """ - if file_partition_cols is None: - file_partition_cols = [] - file_partition_cols = _convert_table_partition_cols(file_partition_cols) - return DataFrame( - self.ctx.read_avro(str(path), schema, file_partition_cols, file_extension) - ) - - def read_table( - self, table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset - ) -> DataFrame: - """Creates a :py:class:`~datafusion.dataframe.DataFrame` from a table.""" - return DataFrame(self.ctx.read_table(table)) - - def execute(self, plan: ExecutionPlan, partitions: int) -> RecordBatchStream: - """Execute the ``plan`` and return the results.""" - return RecordBatchStream(self.ctx.execute(plan._raw_plan, partitions)) - - @staticmethod - def _convert_file_sort_order( - file_sort_order: Sequence[Sequence[SortKey]] | None, - ) -> list[list[expr_internal.SortExpr]] | None: - """Convert nested ``SortKey`` sequences into raw sort expressions. - - Each ``SortKey`` can be a column name string, an ``Expr``, or a - ``SortExpr`` and will be converted using - :func:`datafusion.expr.sort_list_to_raw_sort_list`. - """ - # Convert each ``SortKey`` in the provided sort order to the low-level - # representation expected by the Rust bindings. - return ( - [sort_list_to_raw_sort_list(f) for f in file_sort_order] - if file_sort_order is not None - else None - ) - - @staticmethod - def _convert_table_partition_cols( - table_partition_cols: list[tuple[str, str | pa.DataType]], - ) -> list[tuple[str, pa.DataType]]: - warn = False - converted_table_partition_cols = [] - - for col, data_type in table_partition_cols: - if isinstance(data_type, str): - warn = True - if data_type == "string": - converted_data_type = pa.string() - elif data_type == "int": - converted_data_type = pa.int32() - else: - message = ( - f"Unsupported literal data type '{data_type}' for partition " - "column. Supported types are 'string' and 'int'" - ) - raise ValueError(message) - else: - converted_data_type = data_type - - converted_table_partition_cols.append((col, converted_data_type)) - - if warn: - message = ( - "using literals for table_partition_cols data types is deprecated," - "use pyarrow types instead" - ) - warnings.warn( - message, - category=DeprecationWarning, - stacklevel=2, - ) - - return converted_table_partition_cols - - def __datafusion_task_context_provider__(self) -> Any: - """Access the PyCapsule FFI_TaskContextProvider.""" - return self.ctx.__datafusion_task_context_provider__() - - def __datafusion_logical_extension_codec__(self) -> Any: - """Access the PyCapsule FFI_LogicalExtensionCodec.""" - return self.ctx.__datafusion_logical_extension_codec__() - - def with_logical_extension_codec(self, codec: Any) -> SessionContext: - """Create a new session context with specified codec. - - This only supports codecs that have been implemented using the - FFI interface. - """ - return self.ctx.with_logical_extension_codec(codec) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py deleted file mode 100644 index 214d44a42..000000000 --- a/python/datafusion/dataframe.py +++ /dev/null @@ -1,1441 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""":py:class:`DataFrame` is one of the core concepts in DataFusion. - -See :ref:`user_guide_concepts` in the online documentation for more information. -""" - -from __future__ import annotations - -import warnings -from collections.abc import AsyncIterator, Iterable, Iterator, Sequence -from typing import ( - TYPE_CHECKING, - Any, - Literal, - overload, -) - -try: - from warnings import deprecated # Python 3.13+ -except ImportError: - from typing_extensions import deprecated # Python 3.12 - -from datafusion._internal import DataFrame as DataFrameInternal -from datafusion._internal import DataFrameWriteOptions as DataFrameWriteOptionsInternal -from datafusion._internal import InsertOp as InsertOpInternal -from datafusion._internal import ParquetColumnOptions as ParquetColumnOptionsInternal -from datafusion._internal import ParquetWriterOptions as ParquetWriterOptionsInternal -from datafusion.expr import ( - Expr, - SortExpr, - SortKey, - ensure_expr, - ensure_expr_list, - expr_list_to_raw_expr_list, - sort_list_to_raw_sort_list, -) -from datafusion.plan import ExecutionPlan, LogicalPlan -from datafusion.record_batch import RecordBatch, RecordBatchStream - -if TYPE_CHECKING: - import pathlib - from collections.abc import Callable - - import pandas as pd - import polars as pl - import pyarrow as pa - - from datafusion.catalog import Table - -from enum import Enum - - -# excerpt from deltalake -# https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163 -class Compression(Enum): - """Enum representing the available compression types for Parquet files.""" - - UNCOMPRESSED = "uncompressed" - SNAPPY = "snappy" - GZIP = "gzip" - BROTLI = "brotli" - LZ4 = "lz4" - # lzo is not implemented yet - # https://github.com/apache/arrow-rs/issues/6970 - # LZO = "lzo" # noqa: ERA001 - ZSTD = "zstd" - LZ4_RAW = "lz4_raw" - - @classmethod - def from_str(cls: type[Compression], value: str) -> Compression: - """Convert a string to a Compression enum value. - - Args: - value: The string representation of the compression type. - - Returns: - The Compression enum lowercase value. - - Raises: - ValueError: If the string does not match any Compression enum value. - """ - try: - return cls(value.lower()) - except ValueError as err: - valid_values = str([item.value for item in Compression]) - error_msg = f""" - {value} is not a valid Compression. - Valid values are: {valid_values} - """ - raise ValueError(error_msg) from err - - def get_default_level(self) -> int | None: - """Get the default compression level for the compression type. - - Returns: - The default compression level for the compression type. - """ - # GZIP, BROTLI default values from deltalake repo - # https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163 - # ZSTD default value from delta-rs - # https://github.com/apache/datafusion-python/pull/981#discussion_r1904789223 - if self == Compression.GZIP: - return 6 - if self == Compression.BROTLI: - return 1 - if self == Compression.ZSTD: - return 4 - return None - - -class ParquetWriterOptions: - """Advanced parquet writer options. - - Allows settings the writer options that apply to the entire file. Some options can - also be set on a column by column basis, with the field ``column_specific_options`` - (see ``ParquetColumnOptions``). - """ - - def __init__( - self, - data_pagesize_limit: int = 1024 * 1024, - write_batch_size: int = 1024, - writer_version: str = "1.0", - skip_arrow_metadata: bool = False, - compression: str | None = "zstd(3)", - compression_level: int | None = None, - dictionary_enabled: bool | None = True, - dictionary_page_size_limit: int = 1024 * 1024, - statistics_enabled: str | None = "page", - max_row_group_size: int = 1024 * 1024, - created_by: str = "datafusion-python", - column_index_truncate_length: int | None = 64, - statistics_truncate_length: int | None = None, - data_page_row_count_limit: int = 20_000, - encoding: str | None = None, - bloom_filter_on_write: bool = False, - bloom_filter_fpp: float | None = None, - bloom_filter_ndv: int | None = None, - allow_single_file_parallelism: bool = True, - maximum_parallel_row_group_writers: int = 1, - maximum_buffered_record_batches_per_stream: int = 2, - column_specific_options: dict[str, ParquetColumnOptions] | None = None, - ) -> None: - """Initialize the ParquetWriterOptions. - - Args: - data_pagesize_limit: Sets best effort maximum size of data page in bytes. - write_batch_size: Sets write_batch_size in bytes. - writer_version: Sets parquet writer version. Valid values are ``1.0`` and - ``2.0``. - skip_arrow_metadata: Skip encoding the embedded arrow metadata in the - KV_meta. - compression: Compression type to use. Default is ``zstd(3)``. - Available compression types are - - - ``uncompressed``: No compression. - - ``snappy``: Snappy compression. - - ``gzip(n)``: Gzip compression with level n. - - ``brotli(n)``: Brotli compression with level n. - - ``lz4``: LZ4 compression. - - ``lz4_raw``: LZ4_RAW compression. - - ``zstd(n)``: Zstandard compression with level n. - compression_level: Compression level to set. - dictionary_enabled: Sets if dictionary encoding is enabled. If ``None``, - uses the default parquet writer setting. - dictionary_page_size_limit: Sets best effort maximum dictionary page size, - in bytes. - statistics_enabled: Sets if statistics are enabled for any column Valid - values are ``none``, ``chunk``, and ``page``. If ``None``, uses the - default parquet writer setting. - max_row_group_size: Target maximum number of rows in each row group - (defaults to 1M rows). Writing larger row groups requires more memory - to write, but can get better compression and be faster to read. - created_by: Sets "created by" property. - column_index_truncate_length: Sets column index truncate length. - statistics_truncate_length: Sets statistics truncate length. If ``None``, - uses the default parquet writer setting. - data_page_row_count_limit: Sets best effort maximum number of rows in a data - page. - encoding: Sets default encoding for any column. Valid values are ``plain``, - ``plain_dictionary``, ``rle``, ``bit_packed``, ``delta_binary_packed``, - ``delta_length_byte_array``, ``delta_byte_array``, ``rle_dictionary``, - and ``byte_stream_split``. If ``None``, uses the default parquet writer - setting. - bloom_filter_on_write: Write bloom filters for all columns when creating - parquet files. - bloom_filter_fpp: Sets bloom filter false positive probability. If ``None``, - uses the default parquet writer setting - bloom_filter_ndv: Sets bloom filter number of distinct values. If ``None``, - uses the default parquet writer setting. - allow_single_file_parallelism: Controls whether DataFusion will attempt to - speed up writing parquet files by serializing them in parallel. Each - column in each row group in each output file are serialized in parallel - leveraging a maximum possible core count of - ``n_files * n_row_groups * n_columns``. - maximum_parallel_row_group_writers: By default parallel parquet writer is - tuned for minimum memory usage in a streaming execution plan. You may - see a performance benefit when writing large parquet files by increasing - ``maximum_parallel_row_group_writers`` and - ``maximum_buffered_record_batches_per_stream`` if your system has idle - cores and can tolerate additional memory usage. Boosting these values is - likely worthwhile when writing out already in-memory data, such as from - a cached data frame. - maximum_buffered_record_batches_per_stream: See - ``maximum_parallel_row_group_writers``. - column_specific_options: Overrides options for specific columns. If a column - is not a part of this dictionary, it will use the parameters provided - here. - """ - self.data_pagesize_limit = data_pagesize_limit - self.write_batch_size = write_batch_size - self.writer_version = writer_version - self.skip_arrow_metadata = skip_arrow_metadata - if compression_level is not None: - self.compression = f"{compression}({compression_level})" - else: - self.compression = compression - self.dictionary_enabled = dictionary_enabled - self.dictionary_page_size_limit = dictionary_page_size_limit - self.statistics_enabled = statistics_enabled - self.max_row_group_size = max_row_group_size - self.created_by = created_by - self.column_index_truncate_length = column_index_truncate_length - self.statistics_truncate_length = statistics_truncate_length - self.data_page_row_count_limit = data_page_row_count_limit - self.encoding = encoding - self.bloom_filter_on_write = bloom_filter_on_write - self.bloom_filter_fpp = bloom_filter_fpp - self.bloom_filter_ndv = bloom_filter_ndv - self.allow_single_file_parallelism = allow_single_file_parallelism - self.maximum_parallel_row_group_writers = maximum_parallel_row_group_writers - self.maximum_buffered_record_batches_per_stream = ( - maximum_buffered_record_batches_per_stream - ) - self.column_specific_options = column_specific_options - - -class ParquetColumnOptions: - """Parquet options for individual columns. - - Contains the available options that can be applied for an individual Parquet column, - replacing the global options in ``ParquetWriterOptions``. - """ - - def __init__( - self, - encoding: str | None = None, - dictionary_enabled: bool | None = None, - compression: str | None = None, - statistics_enabled: str | None = None, - bloom_filter_enabled: bool | None = None, - bloom_filter_fpp: float | None = None, - bloom_filter_ndv: int | None = None, - ) -> None: - """Initialize the ParquetColumnOptions. - - Args: - encoding: Sets encoding for the column path. Valid values are: ``plain``, - ``plain_dictionary``, ``rle``, ``bit_packed``, ``delta_binary_packed``, - ``delta_length_byte_array``, ``delta_byte_array``, ``rle_dictionary``, - and ``byte_stream_split``. These values are not case-sensitive. If - ``None``, uses the default parquet options - dictionary_enabled: Sets if dictionary encoding is enabled for the column - path. If `None`, uses the default parquet options - compression: Sets default parquet compression codec for the column path. - Valid values are ``uncompressed``, ``snappy``, ``gzip(level)``, ``lzo``, - ``brotli(level)``, ``lz4``, ``zstd(level)``, and ``lz4_raw``. These - values are not case-sensitive. If ``None``, uses the default parquet - options. - statistics_enabled: Sets if statistics are enabled for the column Valid - values are: ``none``, ``chunk``, and ``page`` These values are not case - sensitive. If ``None``, uses the default parquet options. - bloom_filter_enabled: Sets if bloom filter is enabled for the column path. - If ``None``, uses the default parquet options. - bloom_filter_fpp: Sets bloom filter false positive probability for the - column path. If ``None``, uses the default parquet options. - bloom_filter_ndv: Sets bloom filter number of distinct values. If ``None``, - uses the default parquet options. - """ - self.encoding = encoding - self.dictionary_enabled = dictionary_enabled - self.compression = compression - self.statistics_enabled = statistics_enabled - self.bloom_filter_enabled = bloom_filter_enabled - self.bloom_filter_fpp = bloom_filter_fpp - self.bloom_filter_ndv = bloom_filter_ndv - - -class DataFrame: - """Two dimensional table representation of data. - - DataFrame objects are iterable; iterating over a DataFrame yields - :class:`datafusion.RecordBatch` instances lazily. - - See :ref:`user_guide_concepts` in the online documentation for more information. - """ - - def __init__(self, df: DataFrameInternal) -> None: - """This constructor is not to be used by the end user. - - See :py:class:`~datafusion.context.SessionContext` for methods to - create a :py:class:`DataFrame`. - """ - self.df = df - - def into_view(self, temporary: bool = False) -> Table: - """Convert ``DataFrame`` into a :class:`~datafusion.Table`. - - Examples: - >>> from datafusion import SessionContext - >>> ctx = SessionContext() - >>> df = ctx.sql("SELECT 1 AS value") - >>> view = df.into_view() - >>> ctx.register_table("values_view", view) - >>> result = ctx.sql("SELECT value FROM values_view").collect() - >>> result[0].column("value").to_pylist() - [1] - """ - from datafusion.catalog import Table as _Table - - return _Table(self.df.into_view(temporary)) - - def __getitem__(self, key: str | list[str]) -> DataFrame: - """Return a new :py:class:`DataFrame` with the specified column or columns. - - Args: - key: Column name or list of column names to select. - - Returns: - DataFrame with the specified column or columns. - """ - return DataFrame(self.df.__getitem__(key)) - - def __repr__(self) -> str: - """Return a string representation of the DataFrame. - - Returns: - String representation of the DataFrame. - """ - return self.df.__repr__() - - def _repr_html_(self) -> str: - return self.df._repr_html_() - - @staticmethod - def default_str_repr( - batches: list[pa.RecordBatch], - schema: pa.Schema, - has_more: bool, - table_uuid: str | None = None, - ) -> str: - """Return the default string representation of a DataFrame. - - This method is used by the default formatter and implemented in Rust for - performance reasons. - """ - return DataFrameInternal.default_str_repr(batches, schema, has_more, table_uuid) - - def describe(self) -> DataFrame: - """Return the statistics for this DataFrame. - - Only summarized numeric datatypes at the moments and returns nulls - for non-numeric datatypes. - - The output format is modeled after pandas. - - Returns: - A summary DataFrame containing statistics. - """ - return DataFrame(self.df.describe()) - - def schema(self) -> pa.Schema: - """Return the :py:class:`pyarrow.Schema` of this DataFrame. - - The output schema contains information on the name, data type, and - nullability for each column. - - Returns: - Describing schema of the DataFrame - """ - return self.df.schema() - - @deprecated( - "select_columns() is deprecated. Use :py:meth:`~DataFrame.select` instead" - ) - def select_columns(self, *args: str) -> DataFrame: - """Filter the DataFrame by columns. - - Returns: - DataFrame only containing the specified columns. - """ - return self.select(*args) - - def select_exprs(self, *args: str) -> DataFrame: - """Project arbitrary list of expression strings into a new DataFrame. - - This method will parse string expressions into logical plan expressions. - The output DataFrame has one column for each expression. - - Returns: - DataFrame only containing the specified columns. - """ - return self.df.select_exprs(*args) - - def select(self, *exprs: Expr | str) -> DataFrame: - """Project arbitrary expressions into a new :py:class:`DataFrame`. - - Args: - exprs: Either column names or :py:class:`~datafusion.expr.Expr` to select. - - Returns: - DataFrame after projection. It has one column for each expression. - - Example usage: - - The following example will return 3 columns from the original dataframe. - The first two columns will be the original column ``a`` and ``b`` since the - string "a" is assumed to refer to column selection. Also a duplicate of - column ``a`` will be returned with the column name ``alternate_a``:: - - df = df.select("a", col("b"), col("a").alias("alternate_a")) - - """ - exprs_internal = expr_list_to_raw_expr_list(exprs) - return DataFrame(self.df.select(*exprs_internal)) - - def drop(self, *columns: str) -> DataFrame: - """Drop arbitrary amount of columns. - - Column names are case-sensitive and require double quotes to be dropped - if the original name is not strictly lower case. - - Args: - columns: Column names to drop from the dataframe. - - Returns: - DataFrame with those columns removed in the projection. - - Example Usage:: - df.drop('a') # To drop a lower-cased column 'a' - df.drop('"a"') # To drop an upper-cased column 'A' - """ - return DataFrame(self.df.drop(*columns)) - - def filter(self, *predicates: Expr | str) -> DataFrame: - """Return a DataFrame for which ``predicate`` evaluates to ``True``. - - Rows for which ``predicate`` evaluates to ``False`` or ``None`` are filtered - out. If more than one predicate is provided, these predicates will be - combined as a logical AND. Each ``predicate`` can be an - :class:`~datafusion.expr.Expr` created using helper functions such as - :func:`datafusion.col` or :func:`datafusion.lit`, or a SQL expression string - that will be parsed against the DataFrame schema. If more complex logic is - required, see the logical operations in :py:mod:`~datafusion.functions`. - - Example:: - - from datafusion import col, lit - df.filter(col("a") > lit(1)) - df.filter("a > 1") - - Args: - predicates: Predicate expression(s) or SQL strings to filter the DataFrame. - - Returns: - DataFrame after filtering. - """ - df = self.df - for predicate in predicates: - expr = ( - self.parse_sql_expr(predicate) - if isinstance(predicate, str) - else predicate - ) - df = df.filter(ensure_expr(expr)) - return DataFrame(df) - - def parse_sql_expr(self, expr: str) -> Expr: - """Creates logical expression from a SQL query text. - - The expression is created and processed against the current schema. - - Example:: - - from datafusion import col, lit - df.parse_sql_expr("a > 1") - - should produce: - - col("a") > lit(1) - - Args: - expr: Expression string to be converted to datafusion expression - - Returns: - Logical expression . - """ - return Expr(self.df.parse_sql_expr(expr)) - - def with_column(self, name: str, expr: Expr | str) -> DataFrame: - """Add an additional column to the DataFrame. - - The ``expr`` must be an :class:`~datafusion.expr.Expr` constructed with - :func:`datafusion.col` or :func:`datafusion.lit`, or a SQL expression - string that will be parsed against the DataFrame schema. - - Example:: - - from datafusion import col, lit - df.with_column("b", col("a") + lit(1)) - - Args: - name: Name of the column to add. - expr: Expression to compute the column. - - Returns: - DataFrame with the new column. - """ - expr = self.parse_sql_expr(expr) if isinstance(expr, str) else expr - - return DataFrame(self.df.with_column(name, ensure_expr(expr))) - - def with_columns( - self, *exprs: Expr | str | Iterable[Expr | str], **named_exprs: Expr | str - ) -> DataFrame: - """Add columns to the DataFrame. - - By passing expressions, iterables of expressions, string SQL expressions, - or named expressions. - All expressions must be :class:`~datafusion.expr.Expr` objects created via - :func:`datafusion.col` or :func:`datafusion.lit`, or SQL expression strings. - To pass named expressions use the form ``name=Expr``. - - Example usage: The following will add 4 columns labeled ``a``, ``b``, ``c``, - and ``d``:: - - from datafusion import col, lit - df = df.with_columns( - col("x").alias("a"), - [lit(1).alias("b"), col("y").alias("c")], - d=lit(3) - ) - - Equivalent example using just SQL strings: - - df = df.with_columns( - "x as a", - ["1 as b", "y as c"], - d="3" - ) - - Args: - exprs: Either a single expression, an iterable of expressions to add or - SQL expression strings. - named_exprs: Named expressions in the form of ``name=expr`` - - Returns: - DataFrame with the new columns added. - """ - expressions = [] - for expr in exprs: - if isinstance(expr, str): - expressions.append(self.parse_sql_expr(expr).expr) - elif isinstance(expr, Iterable) and not isinstance( - expr, Expr | str | bytes | bytearray - ): - expressions.extend( - [ - self.parse_sql_expr(e).expr - if isinstance(e, str) - else ensure_expr(e) - for e in expr - ] - ) - else: - expressions.append(ensure_expr(expr)) - - for alias, expr in named_exprs.items(): - e = self.parse_sql_expr(expr) if isinstance(expr, str) else expr - ensure_expr(e) - expressions.append(e.alias(alias).expr) - - return DataFrame(self.df.with_columns(expressions)) - - def with_column_renamed(self, old_name: str, new_name: str) -> DataFrame: - r"""Rename one column by applying a new projection. - - This is a no-op if the column to be renamed does not exist. - - The method supports case sensitive rename with wrapping column name - into one the following symbols (" or ' or \`). - - Args: - old_name: Old column name. - new_name: New column name. - - Returns: - DataFrame with the column renamed. - """ - return DataFrame(self.df.with_column_renamed(old_name, new_name)) - - def aggregate( - self, - group_by: Sequence[Expr | str] | Expr | str, - aggs: Sequence[Expr] | Expr, - ) -> DataFrame: - """Aggregates the rows of the current DataFrame. - - Args: - group_by: Sequence of expressions or column names to group by. - aggs: Sequence of expressions to aggregate. - - Returns: - DataFrame after aggregation. - """ - group_by_list = ( - list(group_by) - if isinstance(group_by, Sequence) and not isinstance(group_by, Expr | str) - else [group_by] - ) - aggs_list = ( - list(aggs) - if isinstance(aggs, Sequence) and not isinstance(aggs, Expr) - else [aggs] - ) - - group_by_exprs = expr_list_to_raw_expr_list(group_by_list) - aggs_exprs = ensure_expr_list(aggs_list) - return DataFrame(self.df.aggregate(group_by_exprs, aggs_exprs)) - - def sort(self, *exprs: SortKey) -> DataFrame: - """Sort the DataFrame by the specified sorting expressions or column names. - - Note that any expression can be turned into a sort expression by - calling its ``sort`` method. - - Args: - exprs: Sort expressions or column names, applied in order. - - Returns: - DataFrame after sorting. - """ - exprs_raw = sort_list_to_raw_sort_list(exprs) - return DataFrame(self.df.sort(*exprs_raw)) - - def cast(self, mapping: dict[str, pa.DataType[Any]]) -> DataFrame: - """Cast one or more columns to a different data type. - - Args: - mapping: Mapped with column as key and column dtype as value. - - Returns: - DataFrame after casting columns - """ - exprs = [Expr.column(col).cast(dtype) for col, dtype in mapping.items()] - return self.with_columns(exprs) - - def limit(self, count: int, offset: int = 0) -> DataFrame: - """Return a new :py:class:`DataFrame` with a limited number of rows. - - Args: - count: Number of rows to limit the DataFrame to. - offset: Number of rows to skip. - - Returns: - DataFrame after limiting. - """ - return DataFrame(self.df.limit(count, offset)) - - def head(self, n: int = 5) -> DataFrame: - """Return a new :py:class:`DataFrame` with a limited number of rows. - - Args: - n: Number of rows to take from the head of the DataFrame. - - Returns: - DataFrame after limiting. - """ - return DataFrame(self.df.limit(n, 0)) - - def tail(self, n: int = 5) -> DataFrame: - """Return a new :py:class:`DataFrame` with a limited number of rows. - - Be aware this could be potentially expensive since the row size needs to be - determined of the dataframe. This is done by collecting it. - - Args: - n: Number of rows to take from the tail of the DataFrame. - - Returns: - DataFrame after limiting. - """ - return DataFrame(self.df.limit(n, max(0, self.count() - n))) - - def collect(self) -> list[pa.RecordBatch]: - """Execute this :py:class:`DataFrame` and collect results into memory. - - Prior to calling ``collect``, modifying a DataFrame simply updates a plan - (no actual computation is performed). Calling ``collect`` triggers the - computation. - - Returns: - List of :py:class:`pyarrow.RecordBatch` collected from the DataFrame. - """ - return self.df.collect() - - def collect_column(self, column_name: str) -> pa.Array | pa.ChunkedArray: - """Executes this :py:class:`DataFrame` for a single column.""" - return self.df.collect_column(column_name) - - def cache(self) -> DataFrame: - """Cache the DataFrame as a memory table. - - Returns: - Cached DataFrame. - """ - return DataFrame(self.df.cache()) - - def collect_partitioned(self) -> list[list[pa.RecordBatch]]: - """Execute this DataFrame and collect all partitioned results. - - This operation returns :py:class:`pyarrow.RecordBatch` maintaining the input - partitioning. - - Returns: - List of list of :py:class:`RecordBatch` collected from the - DataFrame. - """ - return self.df.collect_partitioned() - - def show(self, num: int = 20) -> None: - """Execute the DataFrame and print the result to the console. - - Args: - num: Number of lines to show. - """ - self.df.show(num) - - def distinct(self) -> DataFrame: - """Return a new :py:class:`DataFrame` with all duplicated rows removed. - - Returns: - DataFrame after removing duplicates. - """ - return DataFrame(self.df.distinct()) - - @overload - def join( - self, - right: DataFrame, - on: str | Sequence[str], - how: Literal["inner", "left", "right", "full", "semi", "anti"] = "inner", - *, - left_on: None = None, - right_on: None = None, - join_keys: None = None, - coalesce_duplicate_keys: bool = True, - ) -> DataFrame: ... - - @overload - def join( - self, - right: DataFrame, - on: None = None, - how: Literal["inner", "left", "right", "full", "semi", "anti"] = "inner", - *, - left_on: str | Sequence[str], - right_on: str | Sequence[str], - join_keys: tuple[list[str], list[str]] | None = None, - coalesce_duplicate_keys: bool = True, - ) -> DataFrame: ... - - @overload - def join( - self, - right: DataFrame, - on: None = None, - how: Literal["inner", "left", "right", "full", "semi", "anti"] = "inner", - *, - join_keys: tuple[list[str], list[str]], - left_on: None = None, - right_on: None = None, - coalesce_duplicate_keys: bool = True, - ) -> DataFrame: ... - - def join( - self, - right: DataFrame, - on: str | Sequence[str] | tuple[list[str], list[str]] | None = None, - how: Literal["inner", "left", "right", "full", "semi", "anti"] = "inner", - *, - left_on: str | Sequence[str] | None = None, - right_on: str | Sequence[str] | None = None, - join_keys: tuple[list[str], list[str]] | None = None, - coalesce_duplicate_keys: bool = True, - ) -> DataFrame: - """Join this :py:class:`DataFrame` with another :py:class:`DataFrame`. - - `on` has to be provided or both `left_on` and `right_on` in conjunction. - - Args: - right: Other DataFrame to join with. - on: Column names to join on in both dataframes. - how: Type of join to perform. Supported types are "inner", "left", - "right", "full", "semi", "anti". - left_on: Join column of the left dataframe. - right_on: Join column of the right dataframe. - coalesce_duplicate_keys: When True, coalesce the columns - from the right DataFrame and left DataFrame - that have identical names in the ``on`` fields. - join_keys: Tuple of two lists of column names to join on. [Deprecated] - - Returns: - DataFrame after join. - """ - if join_keys is not None: - warnings.warn( - "`join_keys` is deprecated, use `on` or `left_on` with `right_on`", - category=DeprecationWarning, - stacklevel=2, - ) - left_on = join_keys[0] - right_on = join_keys[1] - - # This check is to prevent breaking API changes where users prior to - # DF 43.0.0 would pass the join_keys as a positional argument instead - # of a keyword argument. - if ( - isinstance(on, tuple) - and len(on) == 2 # noqa: PLR2004 - and isinstance(on[0], list) - and isinstance(on[1], list) - ): - # We know this is safe because we've checked the types - left_on = on[0] - right_on = on[1] - on = None - - if on is not None: - if left_on is not None or right_on is not None: - error_msg = "`left_on` or `right_on` should not provided with `on`" - raise ValueError(error_msg) - left_on = on - right_on = on - elif left_on is not None or right_on is not None: - if left_on is None or right_on is None: - error_msg = "`left_on` and `right_on` should both be provided." - raise ValueError(error_msg) - else: - error_msg = "either `on` or `left_on` and `right_on` should be provided." - raise ValueError(error_msg) - if isinstance(left_on, str): - left_on = [left_on] - if isinstance(right_on, str): - right_on = [right_on] - - return DataFrame( - self.df.join(right.df, how, left_on, right_on, coalesce_duplicate_keys) - ) - - def join_on( - self, - right: DataFrame, - *on_exprs: Expr, - how: Literal["inner", "left", "right", "full", "semi", "anti"] = "inner", - ) -> DataFrame: - """Join two :py:class:`DataFrame` using the specified expressions. - - Join predicates must be :class:`~datafusion.expr.Expr` objects, typically - built with :func:`datafusion.col`. On expressions are used to support - in-equality predicates. Equality predicates are correctly optimized. - - Example:: - - from datafusion import col - df.join_on(other_df, col("id") == col("other_id")) - - Args: - right: Other DataFrame to join with. - on_exprs: single or multiple (in)-equality predicates. - how: Type of join to perform. Supported types are "inner", "left", - "right", "full", "semi", "anti". - - Returns: - DataFrame after join. - """ - exprs = [ensure_expr(expr) for expr in on_exprs] - return DataFrame(self.df.join_on(right.df, exprs, how)) - - def explain(self, verbose: bool = False, analyze: bool = False) -> None: - """Print an explanation of the DataFrame's plan so far. - - If ``analyze`` is specified, runs the plan and reports metrics. - - Args: - verbose: If ``True``, more details will be included. - analyze: If ``True``, the plan will run and metrics reported. - """ - self.df.explain(verbose, analyze) - - def logical_plan(self) -> LogicalPlan: - """Return the unoptimized ``LogicalPlan``. - - Returns: - Unoptimized logical plan. - """ - return LogicalPlan(self.df.logical_plan()) - - def optimized_logical_plan(self) -> LogicalPlan: - """Return the optimized ``LogicalPlan``. - - Returns: - Optimized logical plan. - """ - return LogicalPlan(self.df.optimized_logical_plan()) - - def execution_plan(self) -> ExecutionPlan: - """Return the execution/physical plan. - - Returns: - Execution plan. - """ - return ExecutionPlan(self.df.execution_plan()) - - def repartition(self, num: int) -> DataFrame: - """Repartition a DataFrame into ``num`` partitions. - - The batches allocation uses a round-robin algorithm. - - Args: - num: Number of partitions to repartition the DataFrame into. - - Returns: - Repartitioned DataFrame. - """ - return DataFrame(self.df.repartition(num)) - - def repartition_by_hash(self, *exprs: Expr | str, num: int) -> DataFrame: - """Repartition a DataFrame using a hash partitioning scheme. - - Args: - exprs: Expressions or a SQL expression string to evaluate - and perform hashing on. - num: Number of partitions to repartition the DataFrame into. - - Returns: - Repartitioned DataFrame. - """ - exprs = [self.parse_sql_expr(e) if isinstance(e, str) else e for e in exprs] - exprs = expr_list_to_raw_expr_list(exprs) - - return DataFrame(self.df.repartition_by_hash(*exprs, num=num)) - - def union(self, other: DataFrame, distinct: bool = False) -> DataFrame: - """Calculate the union of two :py:class:`DataFrame`. - - The two :py:class:`DataFrame` must have exactly the same schema. - - Args: - other: DataFrame to union with. - distinct: If ``True``, duplicate rows will be removed. - - Returns: - DataFrame after union. - """ - return DataFrame(self.df.union(other.df, distinct)) - - def union_distinct(self, other: DataFrame) -> DataFrame: - """Calculate the distinct union of two :py:class:`DataFrame`. - - The two :py:class:`DataFrame` must have exactly the same schema. - Any duplicate rows are discarded. - - Args: - other: DataFrame to union with. - - Returns: - DataFrame after union. - """ - return DataFrame(self.df.union_distinct(other.df)) - - def intersect(self, other: DataFrame) -> DataFrame: - """Calculate the intersection of two :py:class:`DataFrame`. - - The two :py:class:`DataFrame` must have exactly the same schema. - - Args: - other: DataFrame to intersect with. - - Returns: - DataFrame after intersection. - """ - return DataFrame(self.df.intersect(other.df)) - - def except_all(self, other: DataFrame) -> DataFrame: - """Calculate the exception of two :py:class:`DataFrame`. - - The two :py:class:`DataFrame` must have exactly the same schema. - - Args: - other: DataFrame to calculate exception with. - - Returns: - DataFrame after exception. - """ - return DataFrame(self.df.except_all(other.df)) - - def write_csv( - self, - path: str | pathlib.Path, - with_header: bool = False, - write_options: DataFrameWriteOptions | None = None, - ) -> None: - """Execute the :py:class:`DataFrame` and write the results to a CSV file. - - Args: - path: Path of the CSV file to write. - with_header: If true, output the CSV header row. - write_options: Options that impact how the DataFrame is written. - """ - raw_write_options = ( - write_options._raw_write_options if write_options is not None else None - ) - self.df.write_csv(str(path), with_header, raw_write_options) - - @overload - def write_parquet( - self, - path: str | pathlib.Path, - compression: str, - compression_level: int | None = None, - write_options: DataFrameWriteOptions | None = None, - ) -> None: ... - - @overload - def write_parquet( - self, - path: str | pathlib.Path, - compression: Compression = Compression.ZSTD, - compression_level: int | None = None, - write_options: DataFrameWriteOptions | None = None, - ) -> None: ... - - @overload - def write_parquet( - self, - path: str | pathlib.Path, - compression: ParquetWriterOptions, - compression_level: None = None, - write_options: DataFrameWriteOptions | None = None, - ) -> None: ... - - def write_parquet( - self, - path: str | pathlib.Path, - compression: str | Compression | ParquetWriterOptions = Compression.ZSTD, - compression_level: int | None = None, - write_options: DataFrameWriteOptions | None = None, - ) -> None: - """Execute the :py:class:`DataFrame` and write the results to a Parquet file. - - Available compression types are: - - - "uncompressed": No compression. - - "snappy": Snappy compression. - - "gzip": Gzip compression. - - "brotli": Brotli compression. - - "lz4": LZ4 compression. - - "lz4_raw": LZ4_RAW compression. - - "zstd": Zstandard compression. - - LZO compression is not yet implemented in arrow-rs and is therefore - excluded. - - Args: - path: Path of the Parquet file to write. - compression: Compression type to use. Default is "ZSTD". - compression_level: Compression level to use. For ZSTD, the - recommended range is 1 to 22, with the default being 4. Higher levels - provide better compression but slower speed. - write_options: Options that impact how the DataFrame is written. - """ - if isinstance(compression, ParquetWriterOptions): - if compression_level is not None: - msg = "compression_level should be None when using ParquetWriterOptions" - raise ValueError(msg) - self.write_parquet_with_options(path, compression) - return - - if isinstance(compression, str): - compression = Compression.from_str(compression) - - if ( - compression in {Compression.GZIP, Compression.BROTLI, Compression.ZSTD} - and compression_level is None - ): - compression_level = compression.get_default_level() - - raw_write_options = ( - write_options._raw_write_options if write_options is not None else None - ) - self.df.write_parquet( - str(path), - compression.value, - compression_level, - raw_write_options, - ) - - def write_parquet_with_options( - self, - path: str | pathlib.Path, - options: ParquetWriterOptions, - write_options: DataFrameWriteOptions | None = None, - ) -> None: - """Execute the :py:class:`DataFrame` and write the results to a Parquet file. - - Allows advanced writer options to be set with `ParquetWriterOptions`. - - Args: - path: Path of the Parquet file to write. - options: Sets the writer parquet options (see `ParquetWriterOptions`). - write_options: Options that impact how the DataFrame is written. - """ - options_internal = ParquetWriterOptionsInternal( - options.data_pagesize_limit, - options.write_batch_size, - options.writer_version, - options.skip_arrow_metadata, - options.compression, - options.dictionary_enabled, - options.dictionary_page_size_limit, - options.statistics_enabled, - options.max_row_group_size, - options.created_by, - options.column_index_truncate_length, - options.statistics_truncate_length, - options.data_page_row_count_limit, - options.encoding, - options.bloom_filter_on_write, - options.bloom_filter_fpp, - options.bloom_filter_ndv, - options.allow_single_file_parallelism, - options.maximum_parallel_row_group_writers, - options.maximum_buffered_record_batches_per_stream, - ) - - column_specific_options_internal = {} - for column, opts in (options.column_specific_options or {}).items(): - column_specific_options_internal[column] = ParquetColumnOptionsInternal( - bloom_filter_enabled=opts.bloom_filter_enabled, - encoding=opts.encoding, - dictionary_enabled=opts.dictionary_enabled, - compression=opts.compression, - statistics_enabled=opts.statistics_enabled, - bloom_filter_fpp=opts.bloom_filter_fpp, - bloom_filter_ndv=opts.bloom_filter_ndv, - ) - - raw_write_options = ( - write_options._raw_write_options if write_options is not None else None - ) - self.df.write_parquet_with_options( - str(path), - options_internal, - column_specific_options_internal, - raw_write_options, - ) - - def write_json( - self, - path: str | pathlib.Path, - write_options: DataFrameWriteOptions | None = None, - ) -> None: - """Execute the :py:class:`DataFrame` and write the results to a JSON file. - - Args: - path: Path of the JSON file to write. - write_options: Options that impact how the DataFrame is written. - """ - raw_write_options = ( - write_options._raw_write_options if write_options is not None else None - ) - self.df.write_json(str(path), write_options=raw_write_options) - - def write_table( - self, table_name: str, write_options: DataFrameWriteOptions | None = None - ) -> None: - """Execute the :py:class:`DataFrame` and write the results to a table. - - The table must be registered with the session to perform this operation. - Not all table providers support writing operations. See the individual - implementations for details. - """ - raw_write_options = ( - write_options._raw_write_options if write_options is not None else None - ) - self.df.write_table(table_name, raw_write_options) - - def to_arrow_table(self) -> pa.Table: - """Execute the :py:class:`DataFrame` and convert it into an Arrow Table. - - Returns: - Arrow Table. - """ - return self.df.to_arrow_table() - - def execute_stream(self) -> RecordBatchStream: - """Executes this DataFrame and returns a stream over a single partition. - - Returns: - Record Batch Stream over a single partition. - """ - return RecordBatchStream(self.df.execute_stream()) - - def execute_stream_partitioned(self) -> list[RecordBatchStream]: - """Executes this DataFrame and returns a stream for each partition. - - Returns: - One record batch stream per partition. - """ - streams = self.df.execute_stream_partitioned() - return [RecordBatchStream(rbs) for rbs in streams] - - def to_pandas(self) -> pd.DataFrame: - """Execute the :py:class:`DataFrame` and convert it into a Pandas DataFrame. - - Returns: - Pandas DataFrame. - """ - return self.df.to_pandas() - - def to_pylist(self) -> list[dict[str, Any]]: - """Execute the :py:class:`DataFrame` and convert it into a list of dictionaries. - - Returns: - List of dictionaries. - """ - return self.df.to_pylist() - - def to_pydict(self) -> dict[str, list[Any]]: - """Execute the :py:class:`DataFrame` and convert it into a dictionary of lists. - - Returns: - Dictionary of lists. - """ - return self.df.to_pydict() - - def to_polars(self) -> pl.DataFrame: - """Execute the :py:class:`DataFrame` and convert it into a Polars DataFrame. - - Returns: - Polars DataFrame. - """ - return self.df.to_polars() - - def count(self) -> int: - """Return the total number of rows in this :py:class:`DataFrame`. - - Note that this method will actually run a plan to calculate the - count, which may be slow for large or complicated DataFrames. - - Returns: - Number of rows in the DataFrame. - """ - return self.df.count() - - @deprecated("Use :py:func:`unnest_columns` instead.") - def unnest_column(self, column: str, preserve_nulls: bool = True) -> DataFrame: - """See :py:func:`unnest_columns`.""" - return DataFrame(self.df.unnest_column(column, preserve_nulls=preserve_nulls)) - - def unnest_columns(self, *columns: str, preserve_nulls: bool = True) -> DataFrame: - """Expand columns of arrays into a single row per array element. - - Args: - columns: Column names to perform unnest operation on. - preserve_nulls: If False, rows with null entries will not be - returned. - - Returns: - A DataFrame with the columns expanded. - """ - columns = list(columns) - return DataFrame(self.df.unnest_columns(columns, preserve_nulls=preserve_nulls)) - - def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: - """Export the DataFrame as an Arrow C Stream. - - The DataFrame is executed using DataFusion's streaming APIs and exposed via - Arrow's C Stream interface. Record batches are produced incrementally, so the - full result set is never materialized in memory. - - When ``requested_schema`` is provided, DataFusion applies only simple - projections such as selecting a subset of existing columns or reordering - them. Column renaming, computed expressions, or type coercion are not - supported through this interface. - - Args: - requested_schema: Either a :py:class:`pyarrow.Schema` or an Arrow C - Schema capsule (``PyCapsule``) produced by - ``schema._export_to_c_capsule()``. The DataFrame will attempt to - align its output with the fields and order specified by this schema. - - Returns: - Arrow ``PyCapsule`` object representing an ``ArrowArrayStream``. - - For practical usage patterns, see the Apache Arrow streaming - documentation: https://arrow.apache.org/docs/python/ipc.html#streaming. - - For details on DataFusion's Arrow integration and DataFrame streaming, - see the user guide (user-guide/io/arrow and user-guide/dataframe/index). - - Notes: - The Arrow C Data Interface PyCapsule details are documented by Apache - Arrow and can be found at: - https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html - """ - # ``DataFrame.__arrow_c_stream__`` in the Rust extension leverages - # ``execute_stream_partitioned`` under the hood to stream batches while - # preserving the original partition order. - return self.df.__arrow_c_stream__(requested_schema) - - def __iter__(self) -> Iterator[RecordBatch]: - """Return an iterator over this DataFrame's record batches.""" - return iter(self.execute_stream()) - - def __aiter__(self) -> AsyncIterator[RecordBatch]: - """Return an async iterator over this DataFrame's record batches. - - We're using __aiter__ because we support Python < 3.10 where aiter() is not - available. - """ - return self.execute_stream().__aiter__() - - def transform(self, func: Callable[..., DataFrame], *args: Any) -> DataFrame: - """Apply a function to the current DataFrame which returns another DataFrame. - - This is useful for chaining together multiple functions. For example:: - - def add_3(df: DataFrame) -> DataFrame: - return df.with_column("modified", lit(3)) - - def within_limit(df: DataFrame, limit: int) -> DataFrame: - return df.filter(col("a") < lit(limit)).distinct() - - df = df.transform(modify_df).transform(within_limit, 4) - - Args: - func: A callable function that takes a DataFrame as it's first argument - args: Zero or more arguments to pass to `func` - - Returns: - DataFrame: After applying func to the original dataframe. - """ - return func(self, *args) - - def fill_null(self, value: Any, subset: list[str] | None = None) -> DataFrame: - """Fill null values in specified columns with a value. - - Args: - value: Value to replace nulls with. Will be cast to match column type. - subset: Optional list of column names to fill. If None, fills all columns. - - Returns: - DataFrame with null values replaced where type casting is possible - - Examples: - >>> from datafusion import SessionContext, col - >>> ctx = SessionContext() - >>> df = ctx.from_pydict({"a": [1, None, 3], "b": [None, 5, 6]}) - >>> filled = df.fill_null(0) - >>> filled.sort(col("a")).collect()[0].column("a").to_pylist() - [0, 1, 3] - - Notes: - - Only fills nulls in columns where the value can be cast to the column type - - For columns where casting fails, the original column is kept unchanged - - For columns not in subset, the original column is kept unchanged - """ - return DataFrame(self.df.fill_null(value, subset)) - - -class InsertOp(Enum): - """Insert operation mode. - - These modes are used by the table writing feature to define how record - batches should be written to a table. - """ - - APPEND = InsertOpInternal.APPEND - """Appends new rows to the existing table without modifying any existing rows.""" - - REPLACE = InsertOpInternal.REPLACE - """Replace existing rows that collide with the inserted rows. - - Replacement is typically based on a unique key or primary key. - """ - - OVERWRITE = InsertOpInternal.OVERWRITE - """Overwrites all existing rows in the table with the new rows.""" - - -class DataFrameWriteOptions: - """Writer options for DataFrame. - - There is no guarantee the table provider supports all writer options. - See the individual implementation and documentation for details. - """ - - def __init__( - self, - insert_operation: InsertOp | None = None, - single_file_output: bool = False, - partition_by: str | Sequence[str] | None = None, - sort_by: Expr | SortExpr | Sequence[Expr] | Sequence[SortExpr] | None = None, - ) -> None: - """Instantiate writer options for DataFrame.""" - if isinstance(partition_by, str): - partition_by = [partition_by] - - sort_by_raw = sort_list_to_raw_sort_list(sort_by) - insert_op = insert_operation.value if insert_operation is not None else None - - self._raw_write_options = DataFrameWriteOptionsInternal( - insert_op, single_file_output, partition_by, sort_by_raw - ) diff --git a/python/datafusion/dataframe_formatter.py b/python/datafusion/dataframe_formatter.py deleted file mode 100644 index b8af45a1b..000000000 --- a/python/datafusion/dataframe_formatter.py +++ /dev/null @@ -1,843 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""HTML formatting utilities for DataFusion DataFrames.""" - -from __future__ import annotations - -import warnings -from typing import ( - TYPE_CHECKING, - Any, - Protocol, - runtime_checkable, -) - -from datafusion._internal import DataFrame as DataFrameInternal - -if TYPE_CHECKING: - from collections.abc import Callable - - -def _validate_positive_int(value: Any, param_name: str) -> None: - """Validate that a parameter is a positive integer. - - Args: - value: The value to validate - param_name: Name of the parameter (used in error message) - - Raises: - ValueError: If the value is not a positive integer - """ - if not isinstance(value, int) or value <= 0: - msg = f"{param_name} must be a positive integer" - raise ValueError(msg) - - -def _validate_bool(value: Any, param_name: str) -> None: - """Validate that a parameter is a boolean. - - Args: - value: The value to validate - param_name: Name of the parameter (used in error message) - - Raises: - TypeError: If the value is not a boolean - """ - if not isinstance(value, bool): - msg = f"{param_name} must be a boolean" - raise TypeError(msg) - - -def _validate_formatter_parameters( - max_cell_length: int, - max_width: int, - max_height: int, - max_memory_bytes: int, - min_rows: int, - max_rows: int | None, - repr_rows: int | None, - enable_cell_expansion: bool, - show_truncation_message: bool, - use_shared_styles: bool, - custom_css: str | None, - style_provider: Any, -) -> int: - """Validate all formatter parameters and return resolved max_rows value. - - Args: - max_cell_length: Maximum cell length value to validate - max_width: Maximum width value to validate - max_height: Maximum height value to validate - max_memory_bytes: Maximum memory bytes value to validate - min_rows: Minimum rows to display value to validate - max_rows: Maximum rows value to validate (None means use default) - repr_rows: Deprecated repr_rows value to validate - enable_cell_expansion: Boolean expansion flag to validate - show_truncation_message: Boolean message flag to validate - use_shared_styles: Boolean styles flag to validate - custom_css: Custom CSS string to validate - style_provider: Style provider object to validate - - Returns: - The resolved max_rows value after handling repr_rows deprecation - - Raises: - ValueError: If any numeric parameter is invalid or constraints are violated - TypeError: If any parameter has invalid type - DeprecationWarning: If repr_rows parameter is used - """ - # Validate numeric parameters - _validate_positive_int(max_cell_length, "max_cell_length") - _validate_positive_int(max_width, "max_width") - _validate_positive_int(max_height, "max_height") - _validate_positive_int(max_memory_bytes, "max_memory_bytes") - _validate_positive_int(min_rows, "min_rows") - - # Handle deprecated repr_rows parameter - if repr_rows is not None: - warnings.warn( - "repr_rows parameter is deprecated, use max_rows instead", - DeprecationWarning, - stacklevel=4, - ) - _validate_positive_int(repr_rows, "repr_rows") - if max_rows is not None and repr_rows != max_rows: - msg = "Cannot specify both repr_rows and max_rows; use max_rows only" - raise ValueError(msg) - max_rows = repr_rows - - # Use default if max_rows was not provided - if max_rows is None: - max_rows = 10 - - _validate_positive_int(max_rows, "max_rows") - - # Validate constraint: min_rows <= max_rows - if min_rows > max_rows: - msg = "min_rows must be less than or equal to max_rows" - raise ValueError(msg) - - # Validate boolean parameters - _validate_bool(enable_cell_expansion, "enable_cell_expansion") - _validate_bool(show_truncation_message, "show_truncation_message") - _validate_bool(use_shared_styles, "use_shared_styles") - - # Validate custom_css - if custom_css is not None and not isinstance(custom_css, str): - msg = "custom_css must be None or a string" - raise TypeError(msg) - - # Validate style_provider - if style_provider is not None and not isinstance(style_provider, StyleProvider): - msg = "style_provider must implement the StyleProvider protocol" - raise TypeError(msg) - - return max_rows - - -@runtime_checkable -class CellFormatter(Protocol): - """Protocol for cell value formatters.""" - - def __call__(self, value: Any) -> str: - """Format a cell value to string representation.""" - ... - - -@runtime_checkable -class StyleProvider(Protocol): - """Protocol for HTML style providers.""" - - def get_cell_style(self) -> str: - """Get the CSS style for table cells.""" - ... - - def get_header_style(self) -> str: - """Get the CSS style for header cells.""" - ... - - -class DefaultStyleProvider: - """Default implementation of StyleProvider.""" - - def get_cell_style(self) -> str: - """Get the CSS style for table cells. - - Returns: - CSS style string - """ - return ( - "border: 1px solid black; padding: 8px; text-align: left; " - "white-space: nowrap;" - ) - - def get_header_style(self) -> str: - """Get the CSS style for header cells. - - Returns: - CSS style string - """ - return ( - "border: 1px solid black; padding: 8px; text-align: left; " - "background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; " - "max-width: fit-content;" - ) - - -class DataFrameHtmlFormatter: - """Configurable HTML formatter for DataFusion DataFrames. - - This class handles the HTML rendering of DataFrames for display in - Jupyter notebooks and other rich display contexts. - - This class supports extension through composition. Key extension points: - - Provide a custom StyleProvider for styling cells and headers - - Register custom formatters for specific types - - Provide custom cell builders for specialized cell rendering - - Args: - max_cell_length: Maximum characters to display in a cell before truncation - max_width: Maximum width of the HTML table in pixels - max_height: Maximum height of the HTML table in pixels - max_memory_bytes: Maximum memory in bytes for rendered data (default: 2MB) - min_rows: Minimum number of rows to display (must be <= max_rows) - max_rows: Maximum number of rows to display in repr output - repr_rows: Deprecated alias for max_rows - enable_cell_expansion: Whether to add expand/collapse buttons for long cell - values - custom_css: Additional CSS to include in the HTML output - show_truncation_message: Whether to display a message when data is truncated - style_provider: Custom provider for cell and header styles - use_shared_styles: Whether to load styles and scripts only once per notebook - session - """ - - def __init__( - self, - max_cell_length: int = 25, - max_width: int = 1000, - max_height: int = 300, - max_memory_bytes: int = 2 * 1024 * 1024, # 2 MB - min_rows: int = 10, - max_rows: int | None = None, - repr_rows: int | None = None, - enable_cell_expansion: bool = True, - custom_css: str | None = None, - show_truncation_message: bool = True, - style_provider: StyleProvider | None = None, - use_shared_styles: bool = True, - ) -> None: - """Initialize the HTML formatter. - - Parameters - ---------- - max_cell_length - Maximum length of cell content before truncation. - max_width - Maximum width of the displayed table in pixels. - max_height - Maximum height of the displayed table in pixels. - max_memory_bytes - Maximum memory in bytes for rendered data. Helps prevent performance - issues with large datasets. - min_rows - Minimum number of rows to display even if memory limit is reached. - Must not exceed ``max_rows``. - max_rows - Maximum number of rows to display. Takes precedence over memory limits - when fewer rows are requested. - repr_rows - Deprecated alias for ``max_rows``. Use ``max_rows`` instead. - enable_cell_expansion - Whether to allow cells to expand when clicked. - custom_css - Custom CSS to apply to the HTML table. - show_truncation_message - Whether to show a message indicating that content has been truncated. - style_provider - Provider of CSS styles for the HTML table. If None, DefaultStyleProvider - is used. - use_shared_styles - Whether to use shared styles across multiple tables. This improves - performance when displaying many DataFrames in a single notebook. - - Raises: - ------ - ValueError - If max_cell_length, max_width, max_height, max_memory_bytes, - min_rows or max_rows is not a positive integer, or if min_rows - exceeds max_rows. - TypeError - If enable_cell_expansion, show_truncation_message, or use_shared_styles is - not a boolean, or if custom_css is provided but is not a string, or if - style_provider is provided but does not implement the StyleProvider - protocol. - """ - # Validate all parameters and get resolved max_rows - resolved_max_rows = _validate_formatter_parameters( - max_cell_length, - max_width, - max_height, - max_memory_bytes, - min_rows, - max_rows, - repr_rows, - enable_cell_expansion, - show_truncation_message, - use_shared_styles, - custom_css, - style_provider, - ) - - self.max_cell_length = max_cell_length - self.max_width = max_width - self.max_height = max_height - self.max_memory_bytes = max_memory_bytes - self.min_rows = min_rows - self._max_rows = resolved_max_rows - self.enable_cell_expansion = enable_cell_expansion - self.custom_css = custom_css - self.show_truncation_message = show_truncation_message - self.style_provider = style_provider or DefaultStyleProvider() - self.use_shared_styles = use_shared_styles - # Registry for custom type formatters - self._type_formatters: dict[type, CellFormatter] = {} - # Custom cell builders - self._custom_cell_builder: Callable[[Any, int, int, str], str] | None = None - self._custom_header_builder: Callable[[Any], str] | None = None - - @property - def max_rows(self) -> int: - """Get the maximum number of rows to display. - - Returns: - The maximum number of rows to display in repr output - """ - return self._max_rows - - @max_rows.setter - def max_rows(self, value: int) -> None: - """Set the maximum number of rows to display. - - Args: - value: The maximum number of rows - """ - self._max_rows = value - - @property - def repr_rows(self) -> int: - """Get the maximum number of rows (deprecated name). - - .. deprecated:: - Use :attr:`max_rows` instead. This property is provided for - backward compatibility. - - Returns: - The maximum number of rows to display - """ - return self._max_rows - - @repr_rows.setter - def repr_rows(self, value: int) -> None: - """Set the maximum number of rows using deprecated name. - - .. deprecated:: - Use :attr:`max_rows` setter instead. This property is provided for - backward compatibility. - - Args: - value: The maximum number of rows - """ - warnings.warn( - "repr_rows is deprecated, use max_rows instead", - DeprecationWarning, - stacklevel=2, - ) - self._max_rows = value - - def register_formatter(self, type_class: type, formatter: CellFormatter) -> None: - """Register a custom formatter for a specific data type. - - Args: - type_class: The type to register a formatter for - formatter: Function that takes a value of the given type and returns - a formatted string - """ - self._type_formatters[type_class] = formatter - - def set_custom_cell_builder( - self, builder: Callable[[Any, int, int, str], str] - ) -> None: - """Set a custom cell builder function. - - Args: - builder: Function that takes (value, row, col, table_id) and returns HTML - """ - self._custom_cell_builder = builder - - def set_custom_header_builder(self, builder: Callable[[Any], str]) -> None: - """Set a custom header builder function. - - Args: - builder: Function that takes a field and returns HTML - """ - self._custom_header_builder = builder - - def format_html( - self, - batches: list, - schema: Any, - has_more: bool = False, - table_uuid: str | None = None, - ) -> str: - """Format record batches as HTML. - - This method is used by DataFrame's _repr_html_ implementation and can be - called directly when custom HTML rendering is needed. - - Args: - batches: List of Arrow RecordBatch objects - schema: Arrow Schema object - has_more: Whether there are more batches not shown - table_uuid: Unique ID for the table, used for JavaScript interactions - - Returns: - HTML string representation of the data - - Raises: - TypeError: If schema is invalid and no batches are provided - """ - if not batches: - return "No data to display" - - # Validate schema - if schema is None or not hasattr(schema, "__iter__"): - msg = "Schema must be provided" - raise TypeError(msg) - - # Generate a unique ID if none provided - table_uuid = table_uuid or f"df-{id(batches)}" - - # Build HTML components - html = [] - - html.extend(self._build_html_header()) - - html.extend(self._build_table_container_start()) - - # Add table header and body - html.extend(self._build_table_header(schema)) - html.extend(self._build_table_body(batches, table_uuid)) - - html.append("") - html.append("") - - # Add footer (JavaScript and messages) - if self.enable_cell_expansion: - html.append(self._get_javascript()) - - # Always add truncation message if needed (independent of styles) - if has_more and self.show_truncation_message: - html.append("
Data truncated due to size.
") - - return "\n".join(html) - - def format_str( - self, - batches: list, - schema: Any, - has_more: bool = False, - table_uuid: str | None = None, - ) -> str: - """Format record batches as a string. - - This method is used by DataFrame's __repr__ implementation and can be - called directly when string rendering is needed. - - Args: - batches: List of Arrow RecordBatch objects - schema: Arrow Schema object - has_more: Whether there are more batches not shown - table_uuid: Unique ID for the table, used for JavaScript interactions - - Returns: - String representation of the data - - Raises: - TypeError: If schema is invalid and no batches are provided - """ - return DataFrameInternal.default_str_repr(batches, schema, has_more, table_uuid) - - def _build_html_header(self) -> list[str]: - """Build the HTML header with CSS styles.""" - default_css = self._get_default_css() if self.enable_cell_expansion else "" - script = f""" - -""" - html = [script] - if self.custom_css: - html.append(f"") - return html - - def _build_table_container_start(self) -> list[str]: - """Build the opening tags for the table container.""" - html = [] - html.append( - f'
' - ) - html.append('') - return html - - def _build_table_header(self, schema: Any) -> list[str]: - """Build the HTML table header with column names.""" - html = [] - html.append("") - html.append("") - for field in schema: - if self._custom_header_builder: - html.append(self._custom_header_builder(field)) - else: - html.append( - f"" - ) - html.append("") - html.append("") - return html - - def _build_table_body(self, batches: list, table_uuid: str) -> list[str]: - """Build the HTML table body with data rows.""" - html = [] - html.append("") - - row_count = 0 - for batch in batches: - for row_idx in range(batch.num_rows): - row_count += 1 - html.append("") - - for col_idx, column in enumerate(batch.columns): - # Get the raw value from the column - raw_value = self._get_cell_value(column, row_idx) - - # Always check for type formatters first to format the value - formatted_value = self._format_cell_value(raw_value) - - # Then apply either custom cell builder or standard cell formatting - if self._custom_cell_builder: - # Pass both the raw value and formatted value to let the - # builder decide - cell_html = self._custom_cell_builder( - raw_value, row_count, col_idx, table_uuid - ) - html.append(cell_html) - else: - # Standard cell formatting with formatted value - if ( - len(str(raw_value)) > self.max_cell_length - and self.enable_cell_expansion - ): - cell_html = self._build_expandable_cell( - formatted_value, row_count, col_idx, table_uuid - ) - else: - cell_html = self._build_regular_cell(formatted_value) - html.append(cell_html) - - html.append("") - - html.append("") - return html - - def _get_cell_value(self, column: Any, row_idx: int) -> Any: - """Extract a cell value from a column. - - Args: - column: Arrow array - row_idx: Row index - - Returns: - The raw cell value - """ - try: - value = column[row_idx] - - if hasattr(value, "as_py"): - return value.as_py() - except (AttributeError, TypeError): - pass - else: - return value - - def _format_cell_value(self, value: Any) -> str: - """Format a cell value for display. - - Uses registered type formatters if available. - - Args: - value: The cell value to format - - Returns: - Formatted cell value as string - """ - # Check for custom type formatters - for type_cls, formatter in self._type_formatters.items(): - if isinstance(value, type_cls): - return formatter(value) - - # If no formatter matched, return string representation - return str(value) - - def _build_expandable_cell( - self, formatted_value: str, row_count: int, col_idx: int, table_uuid: str - ) -> str: - """Build an expandable cell for long content.""" - short_value = str(formatted_value)[: self.max_cell_length] - return ( - f"" - ) - - def _build_regular_cell(self, formatted_value: str) -> str: - """Build a regular table cell.""" - return ( - f"" - ) - - def _build_html_footer(self, has_more: bool) -> list[str]: - """Build the HTML footer with JavaScript and messages.""" - html = [] - - # Add JavaScript for interactivity only if cell expansion is enabled - # and we're not using the shared styles approach - if self.enable_cell_expansion and not self.use_shared_styles: - html.append(self._get_javascript()) - - # Add truncation message if needed - if has_more and self.show_truncation_message: - html.append("
Data truncated due to size.
") - - return html - - def _get_default_css(self) -> str: - """Get default CSS styles for the HTML table.""" - return """ - .expandable-container { - display: inline-block; - max-width: 200px; - } - .expandable { - white-space: nowrap; - overflow: hidden; - text-overflow: ellipsis; - display: block; - } - .full-text { - display: none; - white-space: normal; - } - .expand-btn { - cursor: pointer; - color: blue; - text-decoration: underline; - border: none; - background: none; - font-size: inherit; - display: block; - margin-top: 5px; - } - """ - - def _get_javascript(self) -> str: - """Get JavaScript code for interactive elements.""" - return """ - -""" - - -class FormatterManager: - """Manager class for the global DataFrame HTML formatter instance.""" - - _default_formatter: DataFrameHtmlFormatter = DataFrameHtmlFormatter() - - @classmethod - def set_formatter(cls, formatter: DataFrameHtmlFormatter) -> None: - """Set the global DataFrame HTML formatter. - - Args: - formatter: The formatter instance to use globally - """ - cls._default_formatter = formatter - _refresh_formatter_reference() - - @classmethod - def get_formatter(cls) -> DataFrameHtmlFormatter: - """Get the current global DataFrame HTML formatter. - - Returns: - The global HTML formatter instance - """ - return cls._default_formatter - - -def get_formatter() -> DataFrameHtmlFormatter: - """Get the current global DataFrame HTML formatter. - - This function is used by the DataFrame._repr_html_ implementation to access - the shared formatter instance. It can also be used directly when custom - HTML rendering is needed. - - Returns: - The global HTML formatter instance - - Example: - >>> from datafusion.html_formatter import get_formatter - >>> formatter = get_formatter() - >>> formatter.max_cell_length = 50 # Increase cell length - """ - return FormatterManager.get_formatter() - - -def set_formatter(formatter: DataFrameHtmlFormatter) -> None: - """Set the global DataFrame HTML formatter. - - Args: - formatter: The formatter instance to use globally - - Example: - >>> from datafusion.html_formatter import get_formatter, set_formatter - >>> custom_formatter = DataFrameHtmlFormatter(max_cell_length=100) - >>> set_formatter(custom_formatter) - """ - FormatterManager.set_formatter(formatter) - - -def configure_formatter(**kwargs: Any) -> None: - """Configure the global DataFrame HTML formatter. - - This function creates a new formatter with the provided configuration - and sets it as the global formatter for all DataFrames. - - Args: - **kwargs: Formatter configuration parameters like max_cell_length, - max_width, max_height, enable_cell_expansion, etc. - - Raises: - ValueError: If any invalid parameters are provided - - Example: - >>> from datafusion.html_formatter import configure_formatter - >>> configure_formatter( - ... max_cell_length=50, - ... max_height=500, - ... enable_cell_expansion=True, - ... use_shared_styles=True - ... ) - """ - # Valid parameters accepted by DataFrameHtmlFormatter - valid_params = { - "max_cell_length", - "max_width", - "max_height", - "max_memory_bytes", - "min_rows", - "max_rows", - "repr_rows", - "enable_cell_expansion", - "custom_css", - "show_truncation_message", - "style_provider", - "use_shared_styles", - } - - # Check for invalid parameters - invalid_params = set(kwargs) - valid_params - if invalid_params: - msg = ( - f"Invalid formatter parameters: {', '.join(invalid_params)}. " - f"Valid parameters are: {', '.join(valid_params)}" - ) - raise ValueError(msg) - - # Create and set formatter with validated parameters - set_formatter(DataFrameHtmlFormatter(**kwargs)) - - -def reset_formatter() -> None: - """Reset the global DataFrame HTML formatter to default settings. - - This function creates a new formatter with default configuration - and sets it as the global formatter for all DataFrames. - - Example: - >>> from datafusion.html_formatter import reset_formatter - >>> reset_formatter() # Reset formatter to default settings - """ - formatter = DataFrameHtmlFormatter() - set_formatter(formatter) - - -def _refresh_formatter_reference() -> None: - """Refresh formatter reference in any modules using it. - - This helps ensure that changes to the formatter are reflected in existing - DataFrames that might be caching the formatter reference. - """ - # This is a no-op but signals modules to refresh their reference diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py deleted file mode 100644 index 5760b8948..000000000 --- a/python/datafusion/expr.py +++ /dev/null @@ -1,1430 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""This module supports expressions, one of the core concepts in DataFusion. - -See :ref:`Expressions` in the online documentation for more details. -""" - -# ruff: noqa: PLC0415 - -from __future__ import annotations - -from collections.abc import Iterable, Sequence -from typing import TYPE_CHECKING, Any, ClassVar - -try: - from warnings import deprecated # Python 3.13+ -except ImportError: - from typing_extensions import deprecated # Python 3.12 - -import pyarrow as pa - -from ._internal import expr as expr_internal -from ._internal import functions as functions_internal - -if TYPE_CHECKING: - from collections.abc import Sequence - - from datafusion.common import ( # type: ignore[import] - DataTypeMap, - NullTreatment, - RexType, - ) - from datafusion.plan import LogicalPlan - - -# Standard error message for invalid expression types -# Mention both alias forms of column and literal helpers -EXPR_TYPE_ERROR = "Use col()/column() or lit()/literal() to construct expressions" - -# The following are imported from the internal representation. We may choose to -# give these all proper wrappers, or to simply leave as is. These were added -# in order to support passing the `test_imports` unit test. -# Tim Saucer note: It is not clear to me what the use case is for exposing -# these definitions to the end user. - -Alias = expr_internal.Alias -Analyze = expr_internal.Analyze -Aggregate = expr_internal.Aggregate -AggregateFunction = expr_internal.AggregateFunction -Between = expr_internal.Between -BinaryExpr = expr_internal.BinaryExpr -Case = expr_internal.Case -Cast = expr_internal.Cast -Column = expr_internal.Column -CopyTo = expr_internal.CopyTo -CreateCatalog = expr_internal.CreateCatalog -CreateCatalogSchema = expr_internal.CreateCatalogSchema -CreateExternalTable = expr_internal.CreateExternalTable -CreateFunction = expr_internal.CreateFunction -CreateFunctionBody = expr_internal.CreateFunctionBody -CreateIndex = expr_internal.CreateIndex -CreateMemoryTable = expr_internal.CreateMemoryTable -CreateView = expr_internal.CreateView -Deallocate = expr_internal.Deallocate -DescribeTable = expr_internal.DescribeTable -Distinct = expr_internal.Distinct -DmlStatement = expr_internal.DmlStatement -DropCatalogSchema = expr_internal.DropCatalogSchema -DropFunction = expr_internal.DropFunction -DropTable = expr_internal.DropTable -DropView = expr_internal.DropView -EmptyRelation = expr_internal.EmptyRelation -Execute = expr_internal.Execute -Exists = expr_internal.Exists -Explain = expr_internal.Explain -Extension = expr_internal.Extension -FileType = expr_internal.FileType -Filter = expr_internal.Filter -GroupingSet = expr_internal.GroupingSet -Join = expr_internal.Join -ILike = expr_internal.ILike -InList = expr_internal.InList -InSubquery = expr_internal.InSubquery -IsFalse = expr_internal.IsFalse -IsNotTrue = expr_internal.IsNotTrue -IsNull = expr_internal.IsNull -IsTrue = expr_internal.IsTrue -IsUnknown = expr_internal.IsUnknown -IsNotFalse = expr_internal.IsNotFalse -IsNotNull = expr_internal.IsNotNull -IsNotUnknown = expr_internal.IsNotUnknown -JoinConstraint = expr_internal.JoinConstraint -JoinType = expr_internal.JoinType -Like = expr_internal.Like -Limit = expr_internal.Limit -Literal = expr_internal.Literal -Negative = expr_internal.Negative -Not = expr_internal.Not -OperateFunctionArg = expr_internal.OperateFunctionArg -Partitioning = expr_internal.Partitioning -Placeholder = expr_internal.Placeholder -Prepare = expr_internal.Prepare -Projection = expr_internal.Projection -RecursiveQuery = expr_internal.RecursiveQuery -Repartition = expr_internal.Repartition -ScalarSubquery = expr_internal.ScalarSubquery -ScalarVariable = expr_internal.ScalarVariable -SetVariable = expr_internal.SetVariable -SimilarTo = expr_internal.SimilarTo -Sort = expr_internal.Sort -Subquery = expr_internal.Subquery -SubqueryAlias = expr_internal.SubqueryAlias -TableScan = expr_internal.TableScan -TransactionAccessMode = expr_internal.TransactionAccessMode -TransactionConclusion = expr_internal.TransactionConclusion -TransactionEnd = expr_internal.TransactionEnd -TransactionIsolationLevel = expr_internal.TransactionIsolationLevel -TransactionStart = expr_internal.TransactionStart -TryCast = expr_internal.TryCast -Union = expr_internal.Union -Unnest = expr_internal.Unnest -UnnestExpr = expr_internal.UnnestExpr -Values = expr_internal.Values -WindowExpr = expr_internal.WindowExpr - -__all__ = [ - "EXPR_TYPE_ERROR", - "Aggregate", - "AggregateFunction", - "Alias", - "Analyze", - "Between", - "BinaryExpr", - "Case", - "CaseBuilder", - "Cast", - "Column", - "CopyTo", - "CreateCatalog", - "CreateCatalogSchema", - "CreateExternalTable", - "CreateFunction", - "CreateFunctionBody", - "CreateIndex", - "CreateMemoryTable", - "CreateView", - "Deallocate", - "DescribeTable", - "Distinct", - "DmlStatement", - "DropCatalogSchema", - "DropFunction", - "DropTable", - "DropView", - "EmptyRelation", - "Execute", - "Exists", - "Explain", - "Expr", - "Extension", - "FileType", - "Filter", - "GroupingSet", - "ILike", - "InList", - "InSubquery", - "IsFalse", - "IsNotFalse", - "IsNotNull", - "IsNotTrue", - "IsNotUnknown", - "IsNull", - "IsTrue", - "IsUnknown", - "Join", - "JoinConstraint", - "JoinType", - "Like", - "Limit", - "Literal", - "Literal", - "Negative", - "Not", - "OperateFunctionArg", - "Partitioning", - "Placeholder", - "Prepare", - "Projection", - "RecursiveQuery", - "Repartition", - "ScalarSubquery", - "ScalarVariable", - "SetVariable", - "SimilarTo", - "Sort", - "SortExpr", - "SortKey", - "Subquery", - "SubqueryAlias", - "TableScan", - "TransactionAccessMode", - "TransactionConclusion", - "TransactionEnd", - "TransactionIsolationLevel", - "TransactionStart", - "TryCast", - "Union", - "Unnest", - "UnnestExpr", - "Values", - "Window", - "WindowExpr", - "WindowFrame", - "WindowFrameBound", - "ensure_expr", - "ensure_expr_list", -] - - -def ensure_expr(value: Expr | Any) -> expr_internal.Expr: - """Return the internal expression from ``Expr`` or raise ``TypeError``. - - This helper rejects plain strings and other non-:class:`Expr` values so - higher level APIs consistently require explicit :func:`~datafusion.col` or - :func:`~datafusion.lit` expressions. - - Args: - value: Candidate expression or other object. - - Returns: - The internal expression representation. - - Raises: - TypeError: If ``value`` is not an instance of :class:`Expr`. - """ - if not isinstance(value, Expr): - raise TypeError(EXPR_TYPE_ERROR) - return value.expr - - -def ensure_expr_list( - exprs: Iterable[Expr | Iterable[Expr]], -) -> list[expr_internal.Expr]: - """Flatten an iterable of expressions, validating each via ``ensure_expr``. - - Args: - exprs: Possibly nested iterable containing expressions. - - Returns: - A flat list of raw expressions. - - Raises: - TypeError: If any item is not an instance of :class:`Expr`. - """ - - def _iter( - items: Iterable[Expr | Iterable[Expr]], - ) -> Iterable[expr_internal.Expr]: - for expr in items: - if isinstance(expr, Iterable) and not isinstance( - expr, Expr | str | bytes | bytearray - ): - # Treat string-like objects as atomic to surface standard errors - yield from _iter(expr) - else: - yield ensure_expr(expr) - - return list(_iter(exprs)) - - -def _to_raw_expr(value: Expr | str) -> expr_internal.Expr: - """Convert a Python expression or column name to its raw variant. - - Args: - value: Candidate expression or column name. - - Returns: - The internal :class:`~datafusion._internal.expr.Expr` representation. - - Raises: - TypeError: If ``value`` is neither an :class:`Expr` nor ``str``. - """ - if isinstance(value, str): - return Expr.column(value).expr - if isinstance(value, Expr): - return value.expr - error = ( - "Expected Expr or column name, found:" - f" {type(value).__name__}. {EXPR_TYPE_ERROR}." - ) - raise TypeError(error) - - -def expr_list_to_raw_expr_list( - expr_list: list[Expr] | Expr | None, -) -> list[expr_internal.Expr] | None: - """Convert a sequence of expressions or column names to raw expressions.""" - if isinstance(expr_list, Expr | str): - expr_list = [expr_list] - if expr_list is None: - return None - return [_to_raw_expr(e) for e in expr_list] - - -def sort_or_default(e: Expr | SortExpr) -> expr_internal.SortExpr: - """Helper function to return a default Sort if an Expr is provided.""" - if isinstance(e, SortExpr): - return e.raw_sort - return SortExpr(e, ascending=True, nulls_first=True).raw_sort - - -def sort_list_to_raw_sort_list( - sort_list: Sequence[SortKey] | SortKey | None, -) -> list[expr_internal.SortExpr] | None: - """Helper function to return an optional sort list to raw variant.""" - if isinstance(sort_list, Expr | SortExpr | str): - sort_list = [sort_list] - if sort_list is None: - return None - raw_sort_list = [] - for item in sort_list: - if isinstance(item, SortExpr): - raw_sort_list.append(sort_or_default(item)) - else: - raw_expr = _to_raw_expr(item) # may raise ``TypeError`` - raw_sort_list.append(sort_or_default(Expr(raw_expr))) - return raw_sort_list - - -class Expr: - """Expression object. - - Expressions are one of the core concepts in DataFusion. See - :ref:`Expressions` in the online documentation for more information. - """ - - def __init__(self, expr: expr_internal.RawExpr) -> None: - """This constructor should not be called by the end user.""" - self.expr = expr - - def to_variant(self) -> Any: - """Convert this expression into a python object if possible.""" - return self.expr.to_variant() - - @deprecated( - "display_name() is deprecated. Use :py:meth:`~Expr.schema_name` instead" - ) - def display_name(self) -> str: - """Returns the name of this expression as it should appear in a schema. - - This name will not include any CAST expressions. - """ - return self.schema_name() - - def schema_name(self) -> str: - """Returns the name of this expression as it should appear in a schema. - - This name will not include any CAST expressions. - """ - return self.expr.schema_name() - - def canonical_name(self) -> str: - """Returns a complete string representation of this expression.""" - return self.expr.canonical_name() - - def variant_name(self) -> str: - """Returns the name of the Expr variant. - - Ex: ``IsNotNull``, ``Literal``, ``BinaryExpr``, etc - """ - return self.expr.variant_name() - - def __richcmp__(self, other: Expr, op: int) -> Expr: - """Comparison operator.""" - return Expr(self.expr.__richcmp__(other.expr, op)) - - def __repr__(self) -> str: - """Generate a string representation of this expression.""" - return self.expr.__repr__() - - def __add__(self, rhs: Any) -> Expr: - """Addition operator. - - Accepts either an expression or any valid PyArrow scalar literal value. - """ - if not isinstance(rhs, Expr): - rhs = Expr.literal(rhs) - return Expr(self.expr.__add__(rhs.expr)) - - def __sub__(self, rhs: Any) -> Expr: - """Subtraction operator. - - Accepts either an expression or any valid PyArrow scalar literal value. - """ - if not isinstance(rhs, Expr): - rhs = Expr.literal(rhs) - return Expr(self.expr.__sub__(rhs.expr)) - - def __truediv__(self, rhs: Any) -> Expr: - """Division operator. - - Accepts either an expression or any valid PyArrow scalar literal value. - """ - if not isinstance(rhs, Expr): - rhs = Expr.literal(rhs) - return Expr(self.expr.__truediv__(rhs.expr)) - - def __mul__(self, rhs: Any) -> Expr: - """Multiplication operator. - - Accepts either an expression or any valid PyArrow scalar literal value. - """ - if not isinstance(rhs, Expr): - rhs = Expr.literal(rhs) - return Expr(self.expr.__mul__(rhs.expr)) - - def __mod__(self, rhs: Any) -> Expr: - """Modulo operator (%). - - Accepts either an expression or any valid PyArrow scalar literal value. - """ - if not isinstance(rhs, Expr): - rhs = Expr.literal(rhs) - return Expr(self.expr.__mod__(rhs.expr)) - - def __and__(self, rhs: Expr) -> Expr: - """Logical AND.""" - if not isinstance(rhs, Expr): - rhs = Expr.literal(rhs) - return Expr(self.expr.__and__(rhs.expr)) - - def __or__(self, rhs: Expr) -> Expr: - """Logical OR.""" - if not isinstance(rhs, Expr): - rhs = Expr.literal(rhs) - return Expr(self.expr.__or__(rhs.expr)) - - def __invert__(self) -> Expr: - """Binary not (~).""" - return Expr(self.expr.__invert__()) - - def __getitem__(self, key: str | int) -> Expr: - """Retrieve sub-object. - - If ``key`` is a string, returns the subfield of the struct. - If ``key`` is an integer, retrieves the element in the array. Note that the - element index begins at ``0``, unlike - :py:func:`~datafusion.functions.array_element` which begins at ``1``. - If ``key`` is a slice, returns an array that contains a slice of the - original array. Similar to integer indexing, this follows Python convention - where the index begins at ``0`` unlike - :py:func:`~datafusion.functions.array_slice` which begins at ``1``. - """ - if isinstance(key, int): - return Expr( - functions_internal.array_element(self.expr, Expr.literal(key + 1).expr) - ) - if isinstance(key, slice): - if isinstance(key.start, int): - start = Expr.literal(key.start + 1).expr - elif isinstance(key.start, Expr): - start = (key.start + Expr.literal(1)).expr - else: - # Default start at the first element, index 1 - start = Expr.literal(1).expr - - if isinstance(key.stop, int): - stop = Expr.literal(key.stop).expr - else: - stop = key.stop.expr - - if isinstance(key.step, int): - step = Expr.literal(key.step).expr - elif isinstance(key.step, Expr): - step = key.step.expr - else: - step = key.step - - return Expr(functions_internal.array_slice(self.expr, start, stop, step)) - return Expr(self.expr.__getitem__(key)) - - def __eq__(self, rhs: object) -> Expr: - """Equal to. - - Accepts either an expression or any valid PyArrow scalar literal value. - """ - if not isinstance(rhs, Expr): - rhs = Expr.literal(rhs) - return Expr(self.expr.__eq__(rhs.expr)) - - def __ne__(self, rhs: object) -> Expr: - """Not equal to. - - Accepts either an expression or any valid PyArrow scalar literal value. - """ - if not isinstance(rhs, Expr): - rhs = Expr.literal(rhs) - return Expr(self.expr.__ne__(rhs.expr)) - - def __ge__(self, rhs: Any) -> Expr: - """Greater than or equal to. - - Accepts either an expression or any valid PyArrow scalar literal value. - """ - if not isinstance(rhs, Expr): - rhs = Expr.literal(rhs) - return Expr(self.expr.__ge__(rhs.expr)) - - def __gt__(self, rhs: Any) -> Expr: - """Greater than. - - Accepts either an expression or any valid PyArrow scalar literal value. - """ - if not isinstance(rhs, Expr): - rhs = Expr.literal(rhs) - return Expr(self.expr.__gt__(rhs.expr)) - - def __le__(self, rhs: Any) -> Expr: - """Less than or equal to. - - Accepts either an expression or any valid PyArrow scalar literal value. - """ - if not isinstance(rhs, Expr): - rhs = Expr.literal(rhs) - return Expr(self.expr.__le__(rhs.expr)) - - def __lt__(self, rhs: Any) -> Expr: - """Less than. - - Accepts either an expression or any valid PyArrow scalar literal value. - """ - if not isinstance(rhs, Expr): - rhs = Expr.literal(rhs) - return Expr(self.expr.__lt__(rhs.expr)) - - __radd__ = __add__ - __rand__ = __and__ - __rmod__ = __mod__ - __rmul__ = __mul__ - __ror__ = __or__ - __rsub__ = __sub__ - __rtruediv__ = __truediv__ - - @staticmethod - def literal(value: Any) -> Expr: - """Creates a new expression representing a scalar value. - - ``value`` must be a valid PyArrow scalar value or easily castable to one. - """ - if isinstance(value, str): - value = pa.scalar(value, type=pa.string_view()) - return Expr(expr_internal.RawExpr.literal(value)) - - @staticmethod - def literal_with_metadata(value: Any, metadata: dict[str, str]) -> Expr: - """Creates a new expression representing a scalar value with metadata. - - Args: - value: A valid PyArrow scalar value or easily castable to one. - metadata: Metadata to attach to the expression. - """ - if isinstance(value, str): - value = pa.scalar(value, type=pa.string_view()) - - return Expr(expr_internal.RawExpr.literal_with_metadata(value, metadata)) - - @staticmethod - def string_literal(value: str) -> Expr: - """Creates a new expression representing a UTF8 literal value. - - It is different from `literal` because it is pa.string() instead of - pa.string_view() - - This is needed for cases where DataFusion is expecting a UTF8 instead of - UTF8View literal, like in: - https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179 - """ - if isinstance(value, str): - value = pa.scalar(value, type=pa.string()) - return Expr(expr_internal.RawExpr.literal(value)) - return Expr.literal(value) - - @staticmethod - def column(value: str) -> Expr: - """Creates a new expression representing a column.""" - return Expr(expr_internal.RawExpr.column(value)) - - def alias(self, name: str, metadata: dict[str, str] | None = None) -> Expr: - """Assign a name to the expression. - - Args: - name: The name to assign to the expression. - metadata: Optional metadata to attach to the expression. - - Returns: - A new expression with the assigned name. - """ - return Expr(self.expr.alias(name, metadata)) - - def sort(self, ascending: bool = True, nulls_first: bool = True) -> SortExpr: - """Creates a sort :py:class:`Expr` from an existing :py:class:`Expr`. - - Args: - ascending: If true, sort in ascending order. - nulls_first: Return null values first. - """ - return SortExpr(self, ascending=ascending, nulls_first=nulls_first) - - def is_null(self) -> Expr: - """Returns ``True`` if this expression is null.""" - return Expr(self.expr.is_null()) - - def is_not_null(self) -> Expr: - """Returns ``True`` if this expression is not null.""" - return Expr(self.expr.is_not_null()) - - def fill_nan(self, value: Any | Expr | None = None) -> Expr: - """Fill NaN values with a provided value.""" - if not isinstance(value, Expr): - value = Expr.literal(value) - return Expr(functions_internal.nanvl(self.expr, value.expr)) - - def fill_null(self, value: Any | Expr | None = None) -> Expr: - """Fill NULL values with a provided value.""" - if not isinstance(value, Expr): - value = Expr.literal(value) - return Expr(functions_internal.nvl(self.expr, value.expr)) - - _to_pyarrow_types: ClassVar[dict[type, pa.DataType]] = { - float: pa.float64(), - int: pa.int64(), - str: pa.string(), - bool: pa.bool_(), - } - - def cast(self, to: pa.DataType[Any] | type) -> Expr: - """Cast to a new data type.""" - if not isinstance(to, pa.DataType): - try: - to = self._to_pyarrow_types[to] - except KeyError as err: - error_msg = "Expected instance of pyarrow.DataType or builtins.type" - raise TypeError(error_msg) from err - - return Expr(self.expr.cast(to)) - - def between(self, low: Any, high: Any, negated: bool = False) -> Expr: - """Returns ``True`` if this expression is between a given range. - - Args: - low: lower bound of the range (inclusive). - high: higher bound of the range (inclusive). - negated: negates whether the expression is between a given range - """ - if not isinstance(low, Expr): - low = Expr.literal(low) - - if not isinstance(high, Expr): - high = Expr.literal(high) - - return Expr(self.expr.between(low.expr, high.expr, negated=negated)) - - def rex_type(self) -> RexType: - """Return the Rex Type of this expression. - - A Rex (Row Expression) specifies a single row of data.That specification - could include user defined functions or types. RexType identifies the - row as one of the possible valid ``RexType``. - """ - return self.expr.rex_type() - - def types(self) -> DataTypeMap: - """Return the ``DataTypeMap``. - - Returns: - DataTypeMap which represents the PythonType, Arrow DataType, and - SqlType Enum which this expression represents. - """ - return self.expr.types() - - def python_value(self) -> Any: - """Extracts the Expr value into `Any`. - - This is only valid for literal expressions. - - Returns: - Python object representing literal value of the expression. - """ - return self.expr.python_value() - - def rex_call_operands(self) -> list[Expr]: - """Return the operands of the expression based on it's variant type. - - Row expressions, Rex(s), operate on the concept of operands. Different - variants of Expressions, Expr(s), store those operands in different - datastructures. This function examines the Expr variant and returns - the operands to the calling logic. - """ - return [Expr(e) for e in self.expr.rex_call_operands()] - - def rex_call_operator(self) -> str: - """Extracts the operator associated with a row expression type call.""" - return self.expr.rex_call_operator() - - def column_name(self, plan: LogicalPlan) -> str: - """Compute the output column name based on the provided logical plan.""" - return self.expr.column_name(plan._raw_plan) - - def order_by(self, *exprs: Expr | SortExpr) -> ExprFuncBuilder: - """Set the ordering for a window or aggregate function. - - This function will create an :py:class:`ExprFuncBuilder` that can be used to - set parameters for either window or aggregate functions. If used on any other - type of expression, an error will be generated when ``build()`` is called. - """ - return ExprFuncBuilder(self.expr.order_by([sort_or_default(e) for e in exprs])) - - def filter(self, filter: Expr) -> ExprFuncBuilder: - """Filter an aggregate function. - - This function will create an :py:class:`ExprFuncBuilder` that can be used to - set parameters for either window or aggregate functions. If used on any other - type of expression, an error will be generated when ``build()`` is called. - """ - return ExprFuncBuilder(self.expr.filter(filter.expr)) - - def distinct(self) -> ExprFuncBuilder: - """Only evaluate distinct values for an aggregate function. - - This function will create an :py:class:`ExprFuncBuilder` that can be used to - set parameters for either window or aggregate functions. If used on any other - type of expression, an error will be generated when ``build()`` is called. - """ - return ExprFuncBuilder(self.expr.distinct()) - - def null_treatment(self, null_treatment: NullTreatment) -> ExprFuncBuilder: - """Set the treatment for ``null`` values for a window or aggregate function. - - This function will create an :py:class:`ExprFuncBuilder` that can be used to - set parameters for either window or aggregate functions. If used on any other - type of expression, an error will be generated when ``build()`` is called. - """ - return ExprFuncBuilder(self.expr.null_treatment(null_treatment.value)) - - def partition_by(self, *partition_by: Expr) -> ExprFuncBuilder: - """Set the partitioning for a window function. - - This function will create an :py:class:`ExprFuncBuilder` that can be used to - set parameters for either window or aggregate functions. If used on any other - type of expression, an error will be generated when ``build()`` is called. - """ - return ExprFuncBuilder(self.expr.partition_by([e.expr for e in partition_by])) - - def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder: - """Set the frame fora window function. - - This function will create an :py:class:`ExprFuncBuilder` that can be used to - set parameters for either window or aggregate functions. If used on any other - type of expression, an error will be generated when ``build()`` is called. - """ - return ExprFuncBuilder(self.expr.window_frame(window_frame.window_frame)) - - def over(self, window: Window) -> Expr: - """Turn an aggregate function into a window function. - - This function turns any aggregate function into a window function. With the - exception of ``partition_by``, how each of the parameters is used is determined - by the underlying aggregate function. - - Args: - window: Window definition - """ - partition_by_raw = expr_list_to_raw_expr_list(window._partition_by) - order_by_raw = window._order_by - window_frame_raw = ( - window._window_frame.window_frame - if window._window_frame is not None - else None - ) - null_treatment_raw = ( - window._null_treatment.value if window._null_treatment is not None else None - ) - - return Expr( - self.expr.over( - partition_by=partition_by_raw, - order_by=order_by_raw, - window_frame=window_frame_raw, - null_treatment=null_treatment_raw, - ) - ) - - def asin(self) -> Expr: - """Returns the arc sine or inverse sine of a number.""" - from . import functions as F - - return F.asin(self) - - def array_pop_back(self) -> Expr: - """Returns the array without the last element.""" - from . import functions as F - - return F.array_pop_back(self) - - def reverse(self) -> Expr: - """Reverse the string argument.""" - from . import functions as F - - return F.reverse(self) - - def bit_length(self) -> Expr: - """Returns the number of bits in the string argument.""" - from . import functions as F - - return F.bit_length(self) - - def array_length(self) -> Expr: - """Returns the length of the array.""" - from . import functions as F - - return F.array_length(self) - - def array_ndims(self) -> Expr: - """Returns the number of dimensions of the array.""" - from . import functions as F - - return F.array_ndims(self) - - def to_hex(self) -> Expr: - """Converts an integer to a hexadecimal string.""" - from . import functions as F - - return F.to_hex(self) - - def array_dims(self) -> Expr: - """Returns an array of the array's dimensions.""" - from . import functions as F - - return F.array_dims(self) - - def from_unixtime(self) -> Expr: - """Converts an integer to RFC3339 timestamp format string.""" - from . import functions as F - - return F.from_unixtime(self) - - def array_empty(self) -> Expr: - """Returns a boolean indicating whether the array is empty.""" - from . import functions as F - - return F.array_empty(self) - - def sin(self) -> Expr: - """Returns the sine of the argument.""" - from . import functions as F - - return F.sin(self) - - def log10(self) -> Expr: - """Base 10 logarithm of the argument.""" - from . import functions as F - - return F.log10(self) - - def initcap(self) -> Expr: - """Set the initial letter of each word to capital. - - Converts the first letter of each word in ``string`` to uppercase and the - remaining characters to lowercase. - """ - from . import functions as F - - return F.initcap(self) - - def list_distinct(self) -> Expr: - """Returns distinct values from the array after removing duplicates. - - This is an alias for :py:func:`array_distinct`. - """ - from . import functions as F - - return F.list_distinct(self) - - def iszero(self) -> Expr: - """Returns true if a given number is +0.0 or -0.0 otherwise returns false.""" - from . import functions as F - - return F.iszero(self) - - def array_distinct(self) -> Expr: - """Returns distinct values from the array after removing duplicates.""" - from . import functions as F - - return F.array_distinct(self) - - def arrow_typeof(self) -> Expr: - """Returns the Arrow type of the expression.""" - from . import functions as F - - return F.arrow_typeof(self) - - def length(self) -> Expr: - """The number of characters in the ``string``.""" - from . import functions as F - - return F.length(self) - - def lower(self) -> Expr: - """Converts a string to lowercase.""" - from . import functions as F - - return F.lower(self) - - def acos(self) -> Expr: - """Returns the arc cosine or inverse cosine of a number. - - Returns: - -------- - Expr - A new expression representing the arc cosine of the input expression. - """ - from . import functions as F - - return F.acos(self) - - def ascii(self) -> Expr: - """Returns the numeric code of the first character of the argument.""" - from . import functions as F - - return F.ascii(self) - - def sha384(self) -> Expr: - """Computes the SHA-384 hash of a binary string.""" - from . import functions as F - - return F.sha384(self) - - def isnan(self) -> Expr: - """Returns true if a given number is +NaN or -NaN otherwise returns false.""" - from . import functions as F - - return F.isnan(self) - - def degrees(self) -> Expr: - """Converts the argument from radians to degrees.""" - from . import functions as F - - return F.degrees(self) - - def cardinality(self) -> Expr: - """Returns the total number of elements in the array.""" - from . import functions as F - - return F.cardinality(self) - - def sha224(self) -> Expr: - """Computes the SHA-224 hash of a binary string.""" - from . import functions as F - - return F.sha224(self) - - def asinh(self) -> Expr: - """Returns inverse hyperbolic sine.""" - from . import functions as F - - return F.asinh(self) - - def flatten(self) -> Expr: - """Flattens an array of arrays into a single array.""" - from . import functions as F - - return F.flatten(self) - - def exp(self) -> Expr: - """Returns the exponential of the argument.""" - from . import functions as F - - return F.exp(self) - - def abs(self) -> Expr: - """Return the absolute value of a given number. - - Returns: - -------- - Expr - A new expression representing the absolute value of the input expression. - """ - from . import functions as F - - return F.abs(self) - - def btrim(self) -> Expr: - """Removes all characters, spaces by default, from both sides of a string.""" - from . import functions as F - - return F.btrim(self) - - def md5(self) -> Expr: - """Computes an MD5 128-bit checksum for a string expression.""" - from . import functions as F - - return F.md5(self) - - def octet_length(self) -> Expr: - """Returns the number of bytes of a string.""" - from . import functions as F - - return F.octet_length(self) - - def cosh(self) -> Expr: - """Returns the hyperbolic cosine of the argument.""" - from . import functions as F - - return F.cosh(self) - - def radians(self) -> Expr: - """Converts the argument from degrees to radians.""" - from . import functions as F - - return F.radians(self) - - def sqrt(self) -> Expr: - """Returns the square root of the argument.""" - from . import functions as F - - return F.sqrt(self) - - def character_length(self) -> Expr: - """Returns the number of characters in the argument.""" - from . import functions as F - - return F.character_length(self) - - def tanh(self) -> Expr: - """Returns the hyperbolic tangent of the argument.""" - from . import functions as F - - return F.tanh(self) - - def atan(self) -> Expr: - """Returns inverse tangent of a number.""" - from . import functions as F - - return F.atan(self) - - def rtrim(self) -> Expr: - """Removes all characters, spaces by default, from the end of a string.""" - from . import functions as F - - return F.rtrim(self) - - def atanh(self) -> Expr: - """Returns inverse hyperbolic tangent.""" - from . import functions as F - - return F.atanh(self) - - def list_dims(self) -> Expr: - """Returns an array of the array's dimensions. - - This is an alias for :py:func:`array_dims`. - """ - from . import functions as F - - return F.list_dims(self) - - def sha256(self) -> Expr: - """Computes the SHA-256 hash of a binary string.""" - from . import functions as F - - return F.sha256(self) - - def factorial(self) -> Expr: - """Returns the factorial of the argument.""" - from . import functions as F - - return F.factorial(self) - - def acosh(self) -> Expr: - """Returns inverse hyperbolic cosine.""" - from . import functions as F - - return F.acosh(self) - - def floor(self) -> Expr: - """Returns the nearest integer less than or equal to the argument.""" - from . import functions as F - - return F.floor(self) - - def ceil(self) -> Expr: - """Returns the nearest integer greater than or equal to argument.""" - from . import functions as F - - return F.ceil(self) - - def list_length(self) -> Expr: - """Returns the length of the array. - - This is an alias for :py:func:`array_length`. - """ - from . import functions as F - - return F.list_length(self) - - def upper(self) -> Expr: - """Converts a string to uppercase.""" - from . import functions as F - - return F.upper(self) - - def chr(self) -> Expr: - """Converts the Unicode code point to a UTF8 character.""" - from . import functions as F - - return F.chr(self) - - def ln(self) -> Expr: - """Returns the natural logarithm (base e) of the argument.""" - from . import functions as F - - return F.ln(self) - - def tan(self) -> Expr: - """Returns the tangent of the argument.""" - from . import functions as F - - return F.tan(self) - - def array_pop_front(self) -> Expr: - """Returns the array without the first element.""" - from . import functions as F - - return F.array_pop_front(self) - - def cbrt(self) -> Expr: - """Returns the cube root of a number.""" - from . import functions as F - - return F.cbrt(self) - - def sha512(self) -> Expr: - """Computes the SHA-512 hash of a binary string.""" - from . import functions as F - - return F.sha512(self) - - def char_length(self) -> Expr: - """The number of characters in the ``string``.""" - from . import functions as F - - return F.char_length(self) - - def list_ndims(self) -> Expr: - """Returns the number of dimensions of the array. - - This is an alias for :py:func:`array_ndims`. - """ - from . import functions as F - - return F.list_ndims(self) - - def trim(self) -> Expr: - """Removes all characters, spaces by default, from both sides of a string.""" - from . import functions as F - - return F.trim(self) - - def cos(self) -> Expr: - """Returns the cosine of the argument.""" - from . import functions as F - - return F.cos(self) - - def sinh(self) -> Expr: - """Returns the hyperbolic sine of the argument.""" - from . import functions as F - - return F.sinh(self) - - def empty(self) -> Expr: - """This is an alias for :py:func:`array_empty`.""" - from . import functions as F - - return F.empty(self) - - def ltrim(self) -> Expr: - """Removes all characters, spaces by default, from the beginning of a string.""" - from . import functions as F - - return F.ltrim(self) - - def signum(self) -> Expr: - """Returns the sign of the argument (-1, 0, +1).""" - from . import functions as F - - return F.signum(self) - - def log2(self) -> Expr: - """Base 2 logarithm of the argument.""" - from . import functions as F - - return F.log2(self) - - def cot(self) -> Expr: - """Returns the cotangent of the argument.""" - from . import functions as F - - return F.cot(self) - - -class ExprFuncBuilder: - def __init__(self, builder: expr_internal.ExprFuncBuilder) -> None: - self.builder = builder - - def order_by(self, *exprs: Expr) -> ExprFuncBuilder: - """Set the ordering for a window or aggregate function. - - Values given in ``exprs`` must be sort expressions. You can convert any other - expression to a sort expression using `.sort()`. - """ - return ExprFuncBuilder( - self.builder.order_by([sort_or_default(e) for e in exprs]) - ) - - def filter(self, filter: Expr) -> ExprFuncBuilder: - """Filter values during aggregation.""" - return ExprFuncBuilder(self.builder.filter(filter.expr)) - - def distinct(self) -> ExprFuncBuilder: - """Only evaluate distinct values during aggregation.""" - return ExprFuncBuilder(self.builder.distinct()) - - def null_treatment(self, null_treatment: NullTreatment) -> ExprFuncBuilder: - """Set how nulls are treated for either window or aggregate functions.""" - return ExprFuncBuilder(self.builder.null_treatment(null_treatment.value)) - - def partition_by(self, *partition_by: Expr) -> ExprFuncBuilder: - """Set partitioning for window functions.""" - return ExprFuncBuilder( - self.builder.partition_by([e.expr for e in partition_by]) - ) - - def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder: - """Set window frame for window functions.""" - return ExprFuncBuilder(self.builder.window_frame(window_frame.window_frame)) - - def build(self) -> Expr: - """Create an expression from a Function Builder.""" - return Expr(self.builder.build()) - - -class Window: - """Define reusable window parameters.""" - - def __init__( - self, - partition_by: list[Expr] | Expr | None = None, - window_frame: WindowFrame | None = None, - order_by: list[SortExpr | Expr | str] | Expr | SortExpr | str | None = None, - null_treatment: NullTreatment | None = None, - ) -> None: - """Construct a window definition. - - Args: - partition_by: Partitions for window operation - window_frame: Define the start and end bounds of the window frame - order_by: Set ordering - null_treatment: Indicate how nulls are to be treated - """ - self._partition_by = partition_by - self._window_frame = window_frame - self._order_by = sort_list_to_raw_sort_list(order_by) - self._null_treatment = null_treatment - - -class WindowFrame: - """Defines a window frame for performing window operations.""" - - def __init__( - self, units: str, start_bound: Any | None, end_bound: Any | None - ) -> None: - """Construct a window frame using the given parameters. - - Args: - units: Should be one of ``rows``, ``range``, or ``groups``. - start_bound: Sets the preceding bound. Must be >= 0. If none, this - will be set to unbounded. If unit type is ``groups``, this - parameter must be set. - end_bound: Sets the following bound. Must be >= 0. If none, this - will be set to unbounded. If unit type is ``groups``, this - parameter must be set. - """ - if not isinstance(start_bound, pa.Scalar) and start_bound is not None: - start_bound = pa.scalar(start_bound) - if units in ("rows", "groups"): - start_bound = start_bound.cast(pa.uint64()) - if not isinstance(end_bound, pa.Scalar) and end_bound is not None: - end_bound = pa.scalar(end_bound) - if units in ("rows", "groups"): - end_bound = end_bound.cast(pa.uint64()) - self.window_frame = expr_internal.WindowFrame(units, start_bound, end_bound) - - def __repr__(self) -> str: - """Print a string representation of the window frame.""" - return self.window_frame.__repr__() - - def get_frame_units(self) -> str: - """Returns the window frame units for the bounds.""" - return self.window_frame.get_frame_units() - - def get_lower_bound(self) -> WindowFrameBound: - """Returns starting bound.""" - return WindowFrameBound(self.window_frame.get_lower_bound()) - - def get_upper_bound(self) -> WindowFrameBound: - """Returns end bound.""" - return WindowFrameBound(self.window_frame.get_upper_bound()) - - -class WindowFrameBound: - """Defines a single window frame bound. - - :py:class:`WindowFrame` typically requires a start and end bound. - """ - - def __init__(self, frame_bound: expr_internal.WindowFrameBound) -> None: - """Constructs a window frame bound.""" - self.frame_bound = frame_bound - - def get_offset(self) -> int | None: - """Returns the offset of the window frame.""" - return self.frame_bound.get_offset() - - def is_current_row(self) -> bool: - """Returns if the frame bound is current row.""" - return self.frame_bound.is_current_row() - - def is_following(self) -> bool: - """Returns if the frame bound is following.""" - return self.frame_bound.is_following() - - def is_preceding(self) -> bool: - """Returns if the frame bound is preceding.""" - return self.frame_bound.is_preceding() - - def is_unbounded(self) -> bool: - """Returns if the frame bound is unbounded.""" - return self.frame_bound.is_unbounded() - - -class CaseBuilder: - """Builder class for constructing case statements. - - An example usage would be as follows:: - - import datafusion.functions as f - from datafusion import lit, col - df.select( - f.case(col("column_a")) - .when(lit(1), lit("One")) - .when(lit(2), lit("Two")) - .otherwise(lit("Unknown")) - ) - """ - - def __init__(self, case_builder: expr_internal.CaseBuilder) -> None: - """Constructs a case builder. - - This is not typically called by the end user directly. See - :py:func:`datafusion.functions.case` instead. - """ - self.case_builder = case_builder - - def when(self, when_expr: Expr, then_expr: Expr) -> CaseBuilder: - """Add a case to match against.""" - return CaseBuilder(self.case_builder.when(when_expr.expr, then_expr.expr)) - - def otherwise(self, else_expr: Expr) -> Expr: - """Set a default value for the case statement.""" - return Expr(self.case_builder.otherwise(else_expr.expr)) - - def end(self) -> Expr: - """Finish building a case statement. - - Any non-matching cases will end in a `null` value. - """ - return Expr(self.case_builder.end()) - - -class SortExpr: - """Used to specify sorting on either a DataFrame or function.""" - - def __init__(self, expr: Expr, ascending: bool, nulls_first: bool) -> None: - """This constructor should not be called by the end user.""" - self.raw_sort = expr_internal.SortExpr(expr.expr, ascending, nulls_first) - - def expr(self) -> Expr: - """Return the raw expr backing the SortExpr.""" - return Expr(self.raw_sort.expr()) - - def ascending(self) -> bool: - """Return ascending property.""" - return self.raw_sort.ascending() - - def nulls_first(self) -> bool: - """Return nulls_first property.""" - return self.raw_sort.nulls_first() - - def __repr__(self) -> str: - """Generate a string representation of this expression.""" - return self.raw_sort.__repr__() - - -SortKey = Expr | SortExpr | str diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py deleted file mode 100644 index fd116254b..000000000 --- a/python/datafusion/functions.py +++ /dev/null @@ -1,3091 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""User functions for operating on :py:class:`~datafusion.expr.Expr`.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, Any - -import pyarrow as pa - -from datafusion._internal import functions as f -from datafusion.common import NullTreatment -from datafusion.expr import ( - CaseBuilder, - Expr, - SortExpr, - SortKey, - WindowFrame, - expr_list_to_raw_expr_list, - sort_list_to_raw_sort_list, - sort_or_default, -) - -try: - from warnings import deprecated # Python 3.13+ -except ImportError: - from typing_extensions import deprecated # Python 3.12 - -if TYPE_CHECKING: - from datafusion.context import SessionContext -__all__ = [ - "abs", - "acos", - "acosh", - "alias", - "approx_distinct", - "approx_median", - "approx_percentile_cont", - "approx_percentile_cont_with_weight", - "array", - "array_agg", - "array_append", - "array_cat", - "array_concat", - "array_dims", - "array_distinct", - "array_element", - "array_empty", - "array_except", - "array_extract", - "array_has", - "array_has_all", - "array_has_any", - "array_indexof", - "array_intersect", - "array_join", - "array_length", - "array_ndims", - "array_pop_back", - "array_pop_front", - "array_position", - "array_positions", - "array_prepend", - "array_push_back", - "array_push_front", - "array_remove", - "array_remove_all", - "array_remove_n", - "array_repeat", - "array_replace", - "array_replace_all", - "array_replace_n", - "array_resize", - "array_slice", - "array_sort", - "array_to_string", - "array_union", - "arrow_cast", - "arrow_typeof", - "ascii", - "asin", - "asinh", - "atan", - "atan2", - "atanh", - "avg", - "bit_and", - "bit_length", - "bit_or", - "bit_xor", - "bool_and", - "bool_or", - "btrim", - "cardinality", - "case", - "cbrt", - "ceil", - "char_length", - "character_length", - "chr", - "coalesce", - "col", - "concat", - "concat_ws", - "corr", - "cos", - "cosh", - "cot", - "count", - "count_star", - "covar", - "covar_pop", - "covar_samp", - "cume_dist", - "current_date", - "current_time", - "date_bin", - "date_part", - "date_trunc", - "datepart", - "datetrunc", - "decode", - "degrees", - "dense_rank", - "digest", - "empty", - "encode", - "ends_with", - "exp", - "extract", - "factorial", - "find_in_set", - "first_value", - "flatten", - "floor", - "from_unixtime", - "gcd", - "in_list", - "initcap", - "isnan", - "iszero", - "lag", - "last_value", - "lcm", - "lead", - "left", - "length", - "levenshtein", - "list_append", - "list_cat", - "list_concat", - "list_dims", - "list_distinct", - "list_element", - "list_except", - "list_extract", - "list_indexof", - "list_intersect", - "list_join", - "list_length", - "list_ndims", - "list_position", - "list_positions", - "list_prepend", - "list_push_back", - "list_push_front", - "list_remove", - "list_remove_all", - "list_remove_n", - "list_repeat", - "list_replace", - "list_replace_all", - "list_replace_n", - "list_resize", - "list_slice", - "list_sort", - "list_to_string", - "list_union", - "ln", - "log", - "log2", - "log10", - "lower", - "lpad", - "ltrim", - "make_array", - "make_date", - "make_list", - "max", - "md5", - "mean", - "median", - "min", - "named_struct", - "nanvl", - "now", - "nth_value", - "ntile", - "nullif", - "nvl", - "octet_length", - "order_by", - "overlay", - "percent_rank", - "pi", - "pow", - "power", - "radians", - "random", - "range", - "rank", - "regexp_count", - "regexp_instr", - "regexp_like", - "regexp_match", - "regexp_replace", - "regr_avgx", - "regr_avgy", - "regr_count", - "regr_intercept", - "regr_r2", - "regr_slope", - "regr_sxx", - "regr_sxy", - "regr_syy", - "repeat", - "replace", - "reverse", - "right", - "round", - "row_number", - "rpad", - "rtrim", - "sha224", - "sha256", - "sha384", - "sha512", - "signum", - "sin", - "sinh", - "split_part", - "sqrt", - "starts_with", - "stddev", - "stddev_pop", - "stddev_samp", - "string_agg", - "strpos", - "struct", - "substr", - "substr_index", - "substring", - "sum", - "tan", - "tanh", - "to_char", - "to_date", - "to_hex", - "to_local_time", - "to_time", - "to_timestamp", - "to_timestamp_micros", - "to_timestamp_millis", - "to_timestamp_nanos", - "to_timestamp_seconds", - "to_unixtime", - "today", - "translate", - "trim", - "trunc", - "upper", - "uuid", - "var", - "var_pop", - "var_samp", - "var_sample", - "when", - # Window Functions - "window", -] - - -def isnan(expr: Expr) -> Expr: - """Returns true if a given number is +NaN or -NaN otherwise returns false.""" - return Expr(f.isnan(expr.expr)) - - -def nullif(expr1: Expr, expr2: Expr) -> Expr: - """Returns NULL if expr1 equals expr2; otherwise it returns expr1. - - This can be used to perform the inverse operation of the COALESCE expression. - """ - return Expr(f.nullif(expr1.expr, expr2.expr)) - - -def encode(expr: Expr, encoding: Expr) -> Expr: - """Encode the ``input``, using the ``encoding``. encoding can be base64 or hex.""" - return Expr(f.encode(expr.expr, encoding.expr)) - - -def decode(expr: Expr, encoding: Expr) -> Expr: - """Decode the ``input``, using the ``encoding``. encoding can be base64 or hex.""" - return Expr(f.decode(expr.expr, encoding.expr)) - - -def array_to_string(expr: Expr, delimiter: Expr) -> Expr: - """Converts each element to its text representation.""" - return Expr(f.array_to_string(expr.expr, delimiter.expr.cast(pa.string()))) - - -def array_join(expr: Expr, delimiter: Expr) -> Expr: - """Converts each element to its text representation. - - This is an alias for :py:func:`array_to_string`. - """ - return array_to_string(expr, delimiter) - - -def list_to_string(expr: Expr, delimiter: Expr) -> Expr: - """Converts each element to its text representation. - - This is an alias for :py:func:`array_to_string`. - """ - return array_to_string(expr, delimiter) - - -def list_join(expr: Expr, delimiter: Expr) -> Expr: - """Converts each element to its text representation. - - This is an alias for :py:func:`array_to_string`. - """ - return array_to_string(expr, delimiter) - - -def in_list(arg: Expr, values: list[Expr], negated: bool = False) -> Expr: - """Returns whether the argument is contained within the list ``values``.""" - values = [v.expr for v in values] - return Expr(f.in_list(arg.expr, values, negated)) - - -def digest(value: Expr, method: Expr) -> Expr: - """Computes the binary hash of an expression using the specified algorithm. - - Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, - blake2b, and blake3. - """ - return Expr(f.digest(value.expr, method.expr)) - - -def concat(*args: Expr) -> Expr: - """Concatenates the text representations of all the arguments. - - NULL arguments are ignored. - """ - args = [arg.expr for arg in args] - return Expr(f.concat(args)) - - -def concat_ws(separator: str, *args: Expr) -> Expr: - """Concatenates the list ``args`` with the separator. - - ``NULL`` arguments are ignored. ``separator`` should not be ``NULL``. - """ - args = [arg.expr for arg in args] - return Expr(f.concat_ws(separator, args)) - - -def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> SortExpr: - """Creates a new sort expression.""" - return SortExpr(expr, ascending=ascending, nulls_first=nulls_first) - - -def alias(expr: Expr, name: str, metadata: dict[str, str] | None = None) -> Expr: - """Creates an alias expression with an optional metadata dictionary. - - Args: - expr: The expression to alias - name: The alias name - metadata: Optional metadata to attach to the column - - Returns: - An expression with the given alias - """ - return Expr(f.alias(expr.expr, name, metadata)) - - -def col(name: str) -> Expr: - """Creates a column reference expression.""" - return Expr(f.col(name)) - - -def count_star(filter: Expr | None = None) -> Expr: - """Create a COUNT(1) aggregate expression. - - This aggregate function will count all of the rows in the partition. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``distinct``, and ``null_treatment``. - - Args: - filter: If provided, only count rows for which the filter is True - """ - return count(Expr.literal(1), filter=filter) - - -def case(expr: Expr) -> CaseBuilder: - """Create a case expression. - - Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the - expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for - detailed usage. - """ - return CaseBuilder(f.case(expr.expr)) - - -def when(when: Expr, then: Expr) -> CaseBuilder: - """Create a case expression that has no base expression. - - Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the - expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for - detailed usage. - """ - return CaseBuilder(f.when(when.expr, then.expr)) - - -@deprecated("Prefer to call Expr.over() instead") -def window( - name: str, - args: list[Expr], - partition_by: list[Expr] | Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, - window_frame: WindowFrame | None = None, - filter: Expr | None = None, - distinct: bool = False, - ctx: SessionContext | None = None, -) -> Expr: - """Creates a new Window function expression. - - This interface will soon be deprecated. Instead of using this interface, - users should call the window functions directly. For example, to perform a - lag use:: - - df.select(functions.lag(col("a")).partition_by(col("b")).build()) - - The ``order_by`` parameter accepts column names or expressions, e.g.:: - - window("lag", [col("a")], order_by="ts") - """ - args = [a.expr for a in args] - partition_by_raw = expr_list_to_raw_expr_list(partition_by) - order_by_raw = sort_list_to_raw_sort_list(order_by) - window_frame = window_frame.window_frame if window_frame is not None else None - ctx = ctx.ctx if ctx is not None else None - filter_raw = filter.expr if filter is not None else None - return Expr( - f.window( - name, - args, - partition_by=partition_by_raw, - order_by=order_by_raw, - window_frame=window_frame, - ctx=ctx, - filter=filter_raw, - distinct=distinct, - ) - ) - - -# scalar functions -def abs(arg: Expr) -> Expr: - """Return the absolute value of a given number. - - Returns: - -------- - Expr - A new expression representing the absolute value of the input expression. - """ - return Expr(f.abs(arg.expr)) - - -def acos(arg: Expr) -> Expr: - """Returns the arc cosine or inverse cosine of a number. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.0]}) - >>> result = df.select(dfn.functions.acos(dfn.col("a")).alias("acos")) - >>> result.collect_column("acos")[0].as_py() - 0.0 - """ - return Expr(f.acos(arg.expr)) - - -def acosh(arg: Expr) -> Expr: - """Returns inverse hyperbolic cosine. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [1.0]}) - >>> result = df.select(dfn.functions.acosh(dfn.col("a")).alias("acosh")) - >>> result.collect_column("acosh")[0].as_py() - 0.0 - """ - return Expr(f.acosh(arg.expr)) - - -def ascii(arg: Expr) -> Expr: - """Returns the numeric code of the first character of the argument.""" - return Expr(f.ascii(arg.expr)) - - -def asin(arg: Expr) -> Expr: - """Returns the arc sine or inverse sine of a number. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.asin(dfn.col("a")).alias("asin")) - >>> result.collect_column("asin")[0].as_py() - 0.0 - """ - return Expr(f.asin(arg.expr)) - - -def asinh(arg: Expr) -> Expr: - """Returns inverse hyperbolic sine. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.asinh(dfn.col("a")).alias("asinh")) - >>> result.collect_column("asinh")[0].as_py() - 0.0 - """ - return Expr(f.asinh(arg.expr)) - - -def atan(arg: Expr) -> Expr: - """Returns inverse tangent of a number. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.atan(dfn.col("a")).alias("atan")) - >>> result.collect_column("atan")[0].as_py() - 0.0 - """ - return Expr(f.atan(arg.expr)) - - -def atanh(arg: Expr) -> Expr: - """Returns inverse hyperbolic tangent. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.atanh(dfn.col("a")).alias("atanh")) - >>> result.collect_column("atanh")[0].as_py() - 0.0 - """ - return Expr(f.atanh(arg.expr)) - - -def atan2(y: Expr, x: Expr) -> Expr: - """Returns inverse tangent of a division given in the argument. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"y": [0.0], "x": [1.0]}) - >>> result = df.select( - ... dfn.functions.atan2(dfn.col("y"), dfn.col("x")).alias("atan2")) - >>> result.collect_column("atan2")[0].as_py() - 0.0 - """ - return Expr(f.atan2(y.expr, x.expr)) - - -def bit_length(arg: Expr) -> Expr: - """Returns the number of bits in the string argument.""" - return Expr(f.bit_length(arg.expr)) - - -def btrim(arg: Expr) -> Expr: - """Removes all characters, spaces by default, from both sides of a string.""" - return Expr(f.btrim(arg.expr)) - - -def cbrt(arg: Expr) -> Expr: - """Returns the cube root of a number.""" - return Expr(f.cbrt(arg.expr)) - - -def ceil(arg: Expr) -> Expr: - """Returns the nearest integer greater than or equal to argument.""" - return Expr(f.ceil(arg.expr)) - - -def character_length(arg: Expr) -> Expr: - """Returns the number of characters in the argument.""" - return Expr(f.character_length(arg.expr)) - - -def length(string: Expr) -> Expr: - """The number of characters in the ``string``.""" - return Expr(f.length(string.expr)) - - -def char_length(string: Expr) -> Expr: - """The number of characters in the ``string``.""" - return Expr(f.char_length(string.expr)) - - -def chr(arg: Expr) -> Expr: - """Converts the Unicode code point to a UTF8 character.""" - return Expr(f.chr(arg.expr)) - - -def coalesce(*args: Expr) -> Expr: - """Returns the value of the first expr in ``args`` which is not NULL.""" - args = [arg.expr for arg in args] - return Expr(f.coalesce(*args)) - - -def cos(arg: Expr) -> Expr: - """Returns the cosine of the argument. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0,-1,1]}) - >>> cos_df = df.select(dfn.functions.cos(dfn.col("a")).alias("cos")) - >>> cos_df.collect_column("cos")[0].as_py() - 1.0 - """ - return Expr(f.cos(arg.expr)) - - -def cosh(arg: Expr) -> Expr: - """Returns the hyperbolic cosine of the argument. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0,-1,1]}) - >>> cosh_df = df.select(dfn.functions.cosh(dfn.col("a")).alias("cosh")) - >>> cosh_df.collect_column("cosh")[0].as_py() - 1.0 - """ - return Expr(f.cosh(arg.expr)) - - -def cot(arg: Expr) -> Expr: - """Returns the cotangent of the argument. - - Examples: - --------- - >>> from math import pi - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [pi / 4]}) - >>> import builtins - >>> result = df.select( - ... dfn.functions.cot(dfn.col("a")).alias("cot") - ... ) - >>> builtins.round( - ... result.collect_column("cot")[0].as_py(), 1 - ... ) - 1.0 - """ - return Expr(f.cot(arg.expr)) - - -def degrees(arg: Expr) -> Expr: - """Converts the argument from radians to degrees. - - Examples: - --------- - >>> from math import pi - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0,pi,2*pi]}) - >>> deg_df = df.select(dfn.functions.degrees(dfn.col("a")).alias("deg")) - >>> deg_df.collect_column("deg")[2].as_py() - 360.0 - """ - return Expr(f.degrees(arg.expr)) - - -def ends_with(arg: Expr, suffix: Expr) -> Expr: - """Returns true if the ``string`` ends with the ``suffix``, false otherwise.""" - return Expr(f.ends_with(arg.expr, suffix.expr)) - - -def exp(arg: Expr) -> Expr: - """Returns the exponential of the argument.""" - return Expr(f.exp(arg.expr)) - - -def factorial(arg: Expr) -> Expr: - """Returns the factorial of the argument.""" - return Expr(f.factorial(arg.expr)) - - -def find_in_set(string: Expr, string_list: Expr) -> Expr: - """Find a string in a list of strings. - - Returns a value in the range of 1 to N if the string is in the string list - ``string_list`` consisting of N substrings. - - The string list is a string composed of substrings separated by ``,`` characters. - """ - return Expr(f.find_in_set(string.expr, string_list.expr)) - - -def floor(arg: Expr) -> Expr: - """Returns the nearest integer less than or equal to the argument.""" - return Expr(f.floor(arg.expr)) - - -def gcd(x: Expr, y: Expr) -> Expr: - """Returns the greatest common divisor.""" - return Expr(f.gcd(x.expr, y.expr)) - - -def initcap(string: Expr) -> Expr: - """Set the initial letter of each word to capital. - - Converts the first letter of each word in ``string`` to uppercase and the remaining - characters to lowercase. - """ - return Expr(f.initcap(string.expr)) - - -def instr(string: Expr, substring: Expr) -> Expr: - """Finds the position from where the ``substring`` matches the ``string``. - - This is an alias for :py:func:`strpos`. - """ - return strpos(string, substring) - - -def iszero(arg: Expr) -> Expr: - """Returns true if a given number is +0.0 or -0.0 otherwise returns false.""" - return Expr(f.iszero(arg.expr)) - - -def lcm(x: Expr, y: Expr) -> Expr: - """Returns the least common multiple.""" - return Expr(f.lcm(x.expr, y.expr)) - - -def left(string: Expr, n: Expr) -> Expr: - """Returns the first ``n`` characters in the ``string``.""" - return Expr(f.left(string.expr, n.expr)) - - -def levenshtein(string1: Expr, string2: Expr) -> Expr: - """Returns the Levenshtein distance between the two given strings.""" - return Expr(f.levenshtein(string1.expr, string2.expr)) - - -def ln(arg: Expr) -> Expr: - """Returns the natural logarithm (base e) of the argument.""" - return Expr(f.ln(arg.expr)) - - -def log(base: Expr, num: Expr) -> Expr: - """Returns the logarithm of a number for a particular ``base``.""" - return Expr(f.log(base.expr, num.expr)) - - -def log10(arg: Expr) -> Expr: - """Base 10 logarithm of the argument.""" - return Expr(f.log10(arg.expr)) - - -def log2(arg: Expr) -> Expr: - """Base 2 logarithm of the argument.""" - return Expr(f.log2(arg.expr)) - - -def lower(arg: Expr) -> Expr: - """Converts a string to lowercase.""" - return Expr(f.lower(arg.expr)) - - -def lpad(string: Expr, count: Expr, characters: Expr | None = None) -> Expr: - """Add left padding to a string. - - Extends the string to length length by prepending the characters fill (a - space by default). If the string is already longer than length then it is - truncated (on the right). - """ - characters = characters if characters is not None else Expr.literal(" ") - return Expr(f.lpad(string.expr, count.expr, characters.expr)) - - -def ltrim(arg: Expr) -> Expr: - """Removes all characters, spaces by default, from the beginning of a string.""" - return Expr(f.ltrim(arg.expr)) - - -def md5(arg: Expr) -> Expr: - """Computes an MD5 128-bit checksum for a string expression.""" - return Expr(f.md5(arg.expr)) - - -def nanvl(x: Expr, y: Expr) -> Expr: - """Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``.""" - return Expr(f.nanvl(x.expr, y.expr)) - - -def nvl(x: Expr, y: Expr) -> Expr: - """Returns ``x`` if ``x`` is not ``NULL``. Otherwise returns ``y``.""" - return Expr(f.nvl(x.expr, y.expr)) - - -def octet_length(arg: Expr) -> Expr: - """Returns the number of bytes of a string.""" - return Expr(f.octet_length(arg.expr)) - - -def overlay( - string: Expr, substring: Expr, start: Expr, length: Expr | None = None -) -> Expr: - """Replace a substring with a new substring. - - Replace the substring of string that starts at the ``start``'th character and - extends for ``length`` characters with new substring. - """ - if length is None: - return Expr(f.overlay(string.expr, substring.expr, start.expr)) - return Expr(f.overlay(string.expr, substring.expr, start.expr, length.expr)) - - -def pi() -> Expr: - """Returns an approximate value of π.""" - return Expr(f.pi()) - - -def position(string: Expr, substring: Expr) -> Expr: - """Finds the position from where the ``substring`` matches the ``string``. - - This is an alias for :py:func:`strpos`. - """ - return strpos(string, substring) - - -def power(base: Expr, exponent: Expr) -> Expr: - """Returns ``base`` raised to the power of ``exponent``.""" - return Expr(f.power(base.expr, exponent.expr)) - - -def pow(base: Expr, exponent: Expr) -> Expr: - """Returns ``base`` raised to the power of ``exponent``. - - This is an alias of :py:func:`power`. - """ - return power(base, exponent) - - -def radians(arg: Expr) -> Expr: - """Converts the argument from degrees to radians. - - Examples: - --------- - >>> from math import pi - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [180.0]}) - >>> import builtins - >>> result = df.select( - ... dfn.functions.radians(dfn.col("a")).alias("rad") - ... ) - >>> builtins.round( - ... result.collect_column("rad")[0].as_py(), 6 - ... ) - 3.141593 - """ - return Expr(f.radians(arg.expr)) - - -def regexp_like(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr: - """Find if any regular expression (regex) matches exist. - - Tests a string using a regular expression returning true if at least one match, - false otherwise. - """ - if flags is not None: - flags = flags.expr - return Expr(f.regexp_like(string.expr, regex.expr, flags)) - - -def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr: - """Perform regular expression (regex) matching. - - Returns an array with each element containing the leftmost-first match of the - corresponding index in ``regex`` to string in ``string``. - """ - if flags is not None: - flags = flags.expr - return Expr(f.regexp_match(string.expr, regex.expr, flags)) - - -def regexp_replace( - string: Expr, pattern: Expr, replacement: Expr, flags: Expr | None = None -) -> Expr: - """Replaces substring(s) matching a PCRE-like regular expression. - - The full list of supported features and syntax can be found at - - - Supported flags with the addition of 'g' can be found at - - """ - if flags is not None: - flags = flags.expr - return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr, flags)) - - -def regexp_count( - string: Expr, pattern: Expr, start: Expr | None = None, flags: Expr | None = None -) -> Expr: - """Returns the number of matches in a string. - - Optional start position (the first position is 1) to search for the regular - expression. - """ - if flags is not None: - flags = flags.expr - start = start.expr if start is not None else start - return Expr(f.regexp_count(string.expr, pattern.expr, start, flags)) - - -def regexp_instr( - values: Expr, - regex: Expr, - start: Expr | None = None, - n: Expr | None = None, - flags: Expr | None = None, - sub_expr: Expr | None = None, -) -> Expr: - """Returns the position of a regular expression match in a string. - - Searches ``values`` for the ``n``-th occurrence of ``regex``, starting at position - ``start`` (the first position is 1). Returns the starting or ending position based - on ``end_position``. Use ``flags`` to control regex behavior and ``sub_expr`` to - return the position of a specific capture group instead of the entire match. - """ - start = start.expr if start is not None else None - n = n.expr if n is not None else None - flags = flags.expr if flags is not None else None - sub_expr = sub_expr.expr if sub_expr is not None else None - - return Expr( - f.regexp_instr( - values.expr, - regex.expr, - start, - n, - flags, - sub_expr, - ) - ) - - -def repeat(string: Expr, n: Expr) -> Expr: - """Repeats the ``string`` to ``n`` times.""" - return Expr(f.repeat(string.expr, n.expr)) - - -def replace(string: Expr, from_val: Expr, to_val: Expr) -> Expr: - """Replaces all occurrences of ``from_val`` with ``to_val`` in the ``string``.""" - return Expr(f.replace(string.expr, from_val.expr, to_val.expr)) - - -def reverse(arg: Expr) -> Expr: - """Reverse the string argument.""" - return Expr(f.reverse(arg.expr)) - - -def right(string: Expr, n: Expr) -> Expr: - """Returns the last ``n`` characters in the ``string``.""" - return Expr(f.right(string.expr, n.expr)) - - -def round(value: Expr, decimal_places: Expr | None = None) -> Expr: - """Round the argument to the nearest integer. - - If the optional ``decimal_places`` is specified, round to the nearest number of - decimal places. You can specify a negative number of decimal places. For example - ``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``. - """ - if decimal_places is None: - decimal_places = Expr.literal(0) - return Expr(f.round(value.expr, decimal_places.expr)) - - -def rpad(string: Expr, count: Expr, characters: Expr | None = None) -> Expr: - """Add right padding to a string. - - Extends the string to length length by appending the characters fill (a space - by default). If the string is already longer than length then it is truncated. - """ - characters = characters if characters is not None else Expr.literal(" ") - return Expr(f.rpad(string.expr, count.expr, characters.expr)) - - -def rtrim(arg: Expr) -> Expr: - """Removes all characters, spaces by default, from the end of a string.""" - return Expr(f.rtrim(arg.expr)) - - -def sha224(arg: Expr) -> Expr: - """Computes the SHA-224 hash of a binary string.""" - return Expr(f.sha224(arg.expr)) - - -def sha256(arg: Expr) -> Expr: - """Computes the SHA-256 hash of a binary string.""" - return Expr(f.sha256(arg.expr)) - - -def sha384(arg: Expr) -> Expr: - """Computes the SHA-384 hash of a binary string.""" - return Expr(f.sha384(arg.expr)) - - -def sha512(arg: Expr) -> Expr: - """Computes the SHA-512 hash of a binary string.""" - return Expr(f.sha512(arg.expr)) - - -def signum(arg: Expr) -> Expr: - """Returns the sign of the argument (-1, 0, +1).""" - return Expr(f.signum(arg.expr)) - - -def sin(arg: Expr) -> Expr: - """Returns the sine of the argument. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.sin(dfn.col("a")).alias("sin")) - >>> result.collect_column("sin")[0].as_py() - 0.0 - """ - return Expr(f.sin(arg.expr)) - - -def sinh(arg: Expr) -> Expr: - """Returns the hyperbolic sine of the argument. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.sinh(dfn.col("a")).alias("sinh")) - >>> result.collect_column("sinh")[0].as_py() - 0.0 - """ - return Expr(f.sinh(arg.expr)) - - -def split_part(string: Expr, delimiter: Expr, index: Expr) -> Expr: - """Split a string and return one part. - - Splits a string based on a delimiter and picks out the desired field based - on the index. - """ - return Expr(f.split_part(string.expr, delimiter.expr, index.expr)) - - -def sqrt(arg: Expr) -> Expr: - """Returns the square root of the argument.""" - return Expr(f.sqrt(arg.expr)) - - -def starts_with(string: Expr, prefix: Expr) -> Expr: - """Returns true if string starts with prefix.""" - return Expr(f.starts_with(string.expr, prefix.expr)) - - -def strpos(string: Expr, substring: Expr) -> Expr: - """Finds the position from where the ``substring`` matches the ``string``.""" - return Expr(f.strpos(string.expr, substring.expr)) - - -def substr(string: Expr, position: Expr) -> Expr: - """Substring from the ``position`` to the end.""" - return Expr(f.substr(string.expr, position.expr)) - - -def substr_index(string: Expr, delimiter: Expr, count: Expr) -> Expr: - """Returns an indexed substring. - - The return will be the ``string`` from before ``count`` occurrences of - ``delimiter``. - """ - return Expr(f.substr_index(string.expr, delimiter.expr, count.expr)) - - -def substring(string: Expr, position: Expr, length: Expr) -> Expr: - """Substring from the ``position`` with ``length`` characters.""" - return Expr(f.substring(string.expr, position.expr, length.expr)) - - -def tan(arg: Expr) -> Expr: - """Returns the tangent of the argument. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.tan(dfn.col("a")).alias("tan")) - >>> result.collect_column("tan")[0].as_py() - 0.0 - """ - return Expr(f.tan(arg.expr)) - - -def tanh(arg: Expr) -> Expr: - """Returns the hyperbolic tangent of the argument. - - Examples: - --------- - >>> ctx = dfn.SessionContext() - >>> df = ctx.from_pydict({"a": [0.0]}) - >>> result = df.select(dfn.functions.tanh(dfn.col("a")).alias("tanh")) - >>> result.collect_column("tanh")[0].as_py() - 0.0 - """ - return Expr(f.tanh(arg.expr)) - - -def to_hex(arg: Expr) -> Expr: - """Converts an integer to a hexadecimal string.""" - return Expr(f.to_hex(arg.expr)) - - -def now() -> Expr: - """Returns the current timestamp in nanoseconds. - - This will use the same value for all instances of now() in same statement. - """ - return Expr(f.now()) - - -def to_char(arg: Expr, formatter: Expr) -> Expr: - """Returns a string representation of a date, time, timestamp or duration. - - For usage of ``formatter`` see the rust chrono package ``strftime`` package. - - [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) - """ - return Expr(f.to_char(arg.expr, formatter.expr)) - - -def _unwrap_exprs(args: tuple[Expr, ...]) -> list: - return [arg.expr for arg in args] - - -def to_date(arg: Expr, *formatters: Expr) -> Expr: - """Converts a value to a date (YYYY-MM-DD). - - Supports strings, numeric and timestamp types as input. - Integers and doubles are interpreted as days since the unix epoch. - Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') - if ``formatters`` are not provided. - - For usage of ``formatters`` see the rust chrono package ``strftime`` package. - - [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) - """ - return Expr(f.to_date(arg.expr, *_unwrap_exprs(formatters))) - - -def to_local_time(*args: Expr) -> Expr: - """Converts a timestamp with a timezone to a timestamp without a timezone. - - This function handles daylight saving time changes. - """ - return Expr(f.to_local_time(*_unwrap_exprs(args))) - - -def to_time(arg: Expr, *formatters: Expr) -> Expr: - """Converts a value to a time. Supports strings and timestamps as input. - - If ``formatters`` is not provided strings are parsed as HH:MM:SS, HH:MM or - HH:MM:SS.nnnnnnnnn; - - For usage of ``formatters`` see the rust chrono package ``strftime`` package. - - [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) - """ - return Expr(f.to_time(arg.expr, *_unwrap_exprs(formatters))) - - -def to_timestamp(arg: Expr, *formatters: Expr) -> Expr: - """Converts a string and optional formats to a ``Timestamp`` in nanoseconds. - - For usage of ``formatters`` see the rust chrono package ``strftime`` package. - - [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) - """ - return Expr(f.to_timestamp(arg.expr, *_unwrap_exprs(formatters))) - - -def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr: - """Converts a string and optional formats to a ``Timestamp`` in milliseconds. - - See :py:func:`to_timestamp` for a description on how to use formatters. - """ - return Expr(f.to_timestamp_millis(arg.expr, *_unwrap_exprs(formatters))) - - -def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr: - """Converts a string and optional formats to a ``Timestamp`` in microseconds. - - See :py:func:`to_timestamp` for a description on how to use formatters. - """ - return Expr(f.to_timestamp_micros(arg.expr, *_unwrap_exprs(formatters))) - - -def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr: - """Converts a string and optional formats to a ``Timestamp`` in nanoseconds. - - See :py:func:`to_timestamp` for a description on how to use formatters. - """ - return Expr(f.to_timestamp_nanos(arg.expr, *_unwrap_exprs(formatters))) - - -def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr: - """Converts a string and optional formats to a ``Timestamp`` in seconds. - - See :py:func:`to_timestamp` for a description on how to use formatters. - """ - return Expr(f.to_timestamp_seconds(arg.expr, *_unwrap_exprs(formatters))) - - -def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr: - """Converts a string and optional formats to a Unixtime.""" - return Expr(f.to_unixtime(string.expr, *_unwrap_exprs(format_arguments))) - - -def current_date() -> Expr: - """Returns current UTC date as a Date32 value.""" - return Expr(f.current_date()) - - -today = current_date - - -def current_time() -> Expr: - """Returns current UTC time as a Time64 value.""" - return Expr(f.current_time()) - - -def datepart(part: Expr, date: Expr) -> Expr: - """Return a specified part of a date. - - This is an alias for :py:func:`date_part`. - """ - return date_part(part, date) - - -def date_part(part: Expr, date: Expr) -> Expr: - """Extracts a subfield from the date.""" - return Expr(f.date_part(part.expr, date.expr)) - - -def extract(part: Expr, date: Expr) -> Expr: - """Extracts a subfield from the date. - - This is an alias for :py:func:`date_part`. - """ - return date_part(part, date) - - -def date_trunc(part: Expr, date: Expr) -> Expr: - """Truncates the date to a specified level of precision.""" - return Expr(f.date_trunc(part.expr, date.expr)) - - -def datetrunc(part: Expr, date: Expr) -> Expr: - """Truncates the date to a specified level of precision. - - This is an alias for :py:func:`date_trunc`. - """ - return date_trunc(part, date) - - -def date_bin(stride: Expr, source: Expr, origin: Expr) -> Expr: - """Coerces an arbitrary timestamp to the start of the nearest specified interval.""" - return Expr(f.date_bin(stride.expr, source.expr, origin.expr)) - - -def make_date(year: Expr, month: Expr, day: Expr) -> Expr: - """Make a date from year, month and day component parts.""" - return Expr(f.make_date(year.expr, month.expr, day.expr)) - - -def translate(string: Expr, from_val: Expr, to_val: Expr) -> Expr: - """Replaces the characters in ``from_val`` with the counterpart in ``to_val``.""" - return Expr(f.translate(string.expr, from_val.expr, to_val.expr)) - - -def trim(arg: Expr) -> Expr: - """Removes all characters, spaces by default, from both sides of a string.""" - return Expr(f.trim(arg.expr)) - - -def trunc(num: Expr, precision: Expr | None = None) -> Expr: - """Truncate the number toward zero with optional precision.""" - if precision is not None: - return Expr(f.trunc(num.expr, precision.expr)) - return Expr(f.trunc(num.expr)) - - -def upper(arg: Expr) -> Expr: - """Converts a string to uppercase.""" - return Expr(f.upper(arg.expr)) - - -def make_array(*args: Expr) -> Expr: - """Returns an array using the specified input expressions.""" - args = [arg.expr for arg in args] - return Expr(f.make_array(args)) - - -def make_list(*args: Expr) -> Expr: - """Returns an array using the specified input expressions. - - This is an alias for :py:func:`make_array`. - """ - return make_array(*args) - - -def array(*args: Expr) -> Expr: - """Returns an array using the specified input expressions. - - This is an alias for :py:func:`make_array`. - """ - return make_array(*args) - - -def range(start: Expr, stop: Expr, step: Expr) -> Expr: - """Create a list of values in the range between start and stop.""" - return Expr(f.range(start.expr, stop.expr, step.expr)) - - -def uuid() -> Expr: - """Returns uuid v4 as a string value.""" - return Expr(f.uuid()) - - -def struct(*args: Expr) -> Expr: - """Returns a struct with the given arguments.""" - args = [arg.expr for arg in args] - return Expr(f.struct(*args)) - - -def named_struct(name_pairs: list[tuple[str, Expr]]) -> Expr: - """Returns a struct with the given names and arguments pairs.""" - name_pair_exprs = [ - [Expr.literal(pa.scalar(pair[0], type=pa.string())), pair[1]] - for pair in name_pairs - ] - - # flatten - name_pairs = [x.expr for xs in name_pair_exprs for x in xs] - return Expr(f.named_struct(*name_pairs)) - - -def from_unixtime(arg: Expr) -> Expr: - """Converts an integer to RFC3339 timestamp format string.""" - return Expr(f.from_unixtime(arg.expr)) - - -def arrow_typeof(arg: Expr) -> Expr: - """Returns the Arrow type of the expression.""" - return Expr(f.arrow_typeof(arg.expr)) - - -def arrow_cast(expr: Expr, data_type: Expr) -> Expr: - """Casts an expression to a specified data type.""" - return Expr(f.arrow_cast(expr.expr, data_type.expr)) - - -def random() -> Expr: - """Returns a random value in the range ``0.0 <= x < 1.0``.""" - return Expr(f.random()) - - -def array_append(array: Expr, element: Expr) -> Expr: - """Appends an element to the end of an array.""" - return Expr(f.array_append(array.expr, element.expr)) - - -def array_push_back(array: Expr, element: Expr) -> Expr: - """Appends an element to the end of an array. - - This is an alias for :py:func:`array_append`. - """ - return array_append(array, element) - - -def list_append(array: Expr, element: Expr) -> Expr: - """Appends an element to the end of an array. - - This is an alias for :py:func:`array_append`. - """ - return array_append(array, element) - - -def list_push_back(array: Expr, element: Expr) -> Expr: - """Appends an element to the end of an array. - - This is an alias for :py:func:`array_append`. - """ - return array_append(array, element) - - -def array_concat(*args: Expr) -> Expr: - """Concatenates the input arrays.""" - args = [arg.expr for arg in args] - return Expr(f.array_concat(args)) - - -def array_cat(*args: Expr) -> Expr: - """Concatenates the input arrays. - - This is an alias for :py:func:`array_concat`. - """ - return array_concat(*args) - - -def array_dims(array: Expr) -> Expr: - """Returns an array of the array's dimensions.""" - return Expr(f.array_dims(array.expr)) - - -def array_distinct(array: Expr) -> Expr: - """Returns distinct values from the array after removing duplicates.""" - return Expr(f.array_distinct(array.expr)) - - -def list_cat(*args: Expr) -> Expr: - """Concatenates the input arrays. - - This is an alias for :py:func:`array_concat`, :py:func:`array_cat`. - """ - return array_concat(*args) - - -def list_concat(*args: Expr) -> Expr: - """Concatenates the input arrays. - - This is an alias for :py:func:`array_concat`, :py:func:`array_cat`. - """ - return array_concat(*args) - - -def list_distinct(array: Expr) -> Expr: - """Returns distinct values from the array after removing duplicates. - - This is an alias for :py:func:`array_distinct`. - """ - return array_distinct(array) - - -def list_dims(array: Expr) -> Expr: - """Returns an array of the array's dimensions. - - This is an alias for :py:func:`array_dims`. - """ - return array_dims(array) - - -def array_element(array: Expr, n: Expr) -> Expr: - """Extracts the element with the index n from the array.""" - return Expr(f.array_element(array.expr, n.expr)) - - -def array_empty(array: Expr) -> Expr: - """Returns a boolean indicating whether the array is empty.""" - return Expr(f.array_empty(array.expr)) - - -def array_extract(array: Expr, n: Expr) -> Expr: - """Extracts the element with the index n from the array. - - This is an alias for :py:func:`array_element`. - """ - return array_element(array, n) - - -def list_element(array: Expr, n: Expr) -> Expr: - """Extracts the element with the index n from the array. - - This is an alias for :py:func:`array_element`. - """ - return array_element(array, n) - - -def list_extract(array: Expr, n: Expr) -> Expr: - """Extracts the element with the index n from the array. - - This is an alias for :py:func:`array_element`. - """ - return array_element(array, n) - - -def array_length(array: Expr) -> Expr: - """Returns the length of the array.""" - return Expr(f.array_length(array.expr)) - - -def list_length(array: Expr) -> Expr: - """Returns the length of the array. - - This is an alias for :py:func:`array_length`. - """ - return array_length(array) - - -def array_has(first_array: Expr, second_array: Expr) -> Expr: - """Returns true if the element appears in the first array, otherwise false.""" - return Expr(f.array_has(first_array.expr, second_array.expr)) - - -def array_has_all(first_array: Expr, second_array: Expr) -> Expr: - """Determines if there is complete overlap ``second_array`` in ``first_array``. - - Returns true if each element of the second array appears in the first array. - Otherwise, it returns false. - """ - return Expr(f.array_has_all(first_array.expr, second_array.expr)) - - -def array_has_any(first_array: Expr, second_array: Expr) -> Expr: - """Determine if there is an overlap between ``first_array`` and ``second_array``. - - Returns true if at least one element of the second array appears in the first - array. Otherwise, it returns false. - """ - return Expr(f.array_has_any(first_array.expr, second_array.expr)) - - -def array_position(array: Expr, element: Expr, index: int | None = 1) -> Expr: - """Return the position of the first occurrence of ``element`` in ``array``.""" - return Expr(f.array_position(array.expr, element.expr, index)) - - -def array_indexof(array: Expr, element: Expr, index: int | None = 1) -> Expr: - """Return the position of the first occurrence of ``element`` in ``array``. - - This is an alias for :py:func:`array_position`. - """ - return array_position(array, element, index) - - -def list_position(array: Expr, element: Expr, index: int | None = 1) -> Expr: - """Return the position of the first occurrence of ``element`` in ``array``. - - This is an alias for :py:func:`array_position`. - """ - return array_position(array, element, index) - - -def list_indexof(array: Expr, element: Expr, index: int | None = 1) -> Expr: - """Return the position of the first occurrence of ``element`` in ``array``. - - This is an alias for :py:func:`array_position`. - """ - return array_position(array, element, index) - - -def array_positions(array: Expr, element: Expr) -> Expr: - """Searches for an element in the array and returns all occurrences.""" - return Expr(f.array_positions(array.expr, element.expr)) - - -def list_positions(array: Expr, element: Expr) -> Expr: - """Searches for an element in the array and returns all occurrences. - - This is an alias for :py:func:`array_positions`. - """ - return array_positions(array, element) - - -def array_ndims(array: Expr) -> Expr: - """Returns the number of dimensions of the array.""" - return Expr(f.array_ndims(array.expr)) - - -def list_ndims(array: Expr) -> Expr: - """Returns the number of dimensions of the array. - - This is an alias for :py:func:`array_ndims`. - """ - return array_ndims(array) - - -def array_prepend(element: Expr, array: Expr) -> Expr: - """Prepends an element to the beginning of an array.""" - return Expr(f.array_prepend(element.expr, array.expr)) - - -def array_push_front(element: Expr, array: Expr) -> Expr: - """Prepends an element to the beginning of an array. - - This is an alias for :py:func:`array_prepend`. - """ - return array_prepend(element, array) - - -def list_prepend(element: Expr, array: Expr) -> Expr: - """Prepends an element to the beginning of an array. - - This is an alias for :py:func:`array_prepend`. - """ - return array_prepend(element, array) - - -def list_push_front(element: Expr, array: Expr) -> Expr: - """Prepends an element to the beginning of an array. - - This is an alias for :py:func:`array_prepend`. - """ - return array_prepend(element, array) - - -def array_pop_back(array: Expr) -> Expr: - """Returns the array without the last element.""" - return Expr(f.array_pop_back(array.expr)) - - -def array_pop_front(array: Expr) -> Expr: - """Returns the array without the first element.""" - return Expr(f.array_pop_front(array.expr)) - - -def array_remove(array: Expr, element: Expr) -> Expr: - """Removes the first element from the array equal to the given value.""" - return Expr(f.array_remove(array.expr, element.expr)) - - -def list_remove(array: Expr, element: Expr) -> Expr: - """Removes the first element from the array equal to the given value. - - This is an alias for :py:func:`array_remove`. - """ - return array_remove(array, element) - - -def array_remove_n(array: Expr, element: Expr, max: Expr) -> Expr: - """Removes the first ``max`` elements from the array equal to the given value.""" - return Expr(f.array_remove_n(array.expr, element.expr, max.expr)) - - -def list_remove_n(array: Expr, element: Expr, max: Expr) -> Expr: - """Removes the first ``max`` elements from the array equal to the given value. - - This is an alias for :py:func:`array_remove_n`. - """ - return array_remove_n(array, element, max) - - -def array_remove_all(array: Expr, element: Expr) -> Expr: - """Removes all elements from the array equal to the given value.""" - return Expr(f.array_remove_all(array.expr, element.expr)) - - -def list_remove_all(array: Expr, element: Expr) -> Expr: - """Removes all elements from the array equal to the given value. - - This is an alias for :py:func:`array_remove_all`. - """ - return array_remove_all(array, element) - - -def array_repeat(element: Expr, count: Expr) -> Expr: - """Returns an array containing ``element`` ``count`` times.""" - return Expr(f.array_repeat(element.expr, count.expr)) - - -def list_repeat(element: Expr, count: Expr) -> Expr: - """Returns an array containing ``element`` ``count`` times. - - This is an alias for :py:func:`array_repeat`. - """ - return array_repeat(element, count) - - -def array_replace(array: Expr, from_val: Expr, to_val: Expr) -> Expr: - """Replaces the first occurrence of ``from_val`` with ``to_val``.""" - return Expr(f.array_replace(array.expr, from_val.expr, to_val.expr)) - - -def list_replace(array: Expr, from_val: Expr, to_val: Expr) -> Expr: - """Replaces the first occurrence of ``from_val`` with ``to_val``. - - This is an alias for :py:func:`array_replace`. - """ - return array_replace(array, from_val, to_val) - - -def array_replace_n(array: Expr, from_val: Expr, to_val: Expr, max: Expr) -> Expr: - """Replace ``n`` occurrences of ``from_val`` with ``to_val``. - - Replaces the first ``max`` occurrences of the specified element with another - specified element. - """ - return Expr(f.array_replace_n(array.expr, from_val.expr, to_val.expr, max.expr)) - - -def list_replace_n(array: Expr, from_val: Expr, to_val: Expr, max: Expr) -> Expr: - """Replace ``n`` occurrences of ``from_val`` with ``to_val``. - - Replaces the first ``max`` occurrences of the specified element with another - specified element. - - This is an alias for :py:func:`array_replace_n`. - """ - return array_replace_n(array, from_val, to_val, max) - - -def array_replace_all(array: Expr, from_val: Expr, to_val: Expr) -> Expr: - """Replaces all occurrences of ``from_val`` with ``to_val``.""" - return Expr(f.array_replace_all(array.expr, from_val.expr, to_val.expr)) - - -def list_replace_all(array: Expr, from_val: Expr, to_val: Expr) -> Expr: - """Replaces all occurrences of ``from_val`` with ``to_val``. - - This is an alias for :py:func:`array_replace_all`. - """ - return array_replace_all(array, from_val, to_val) - - -def array_sort(array: Expr, descending: bool = False, null_first: bool = False) -> Expr: - """Sort an array. - - Args: - array: The input array to sort. - descending: If True, sorts in descending order. - null_first: If True, nulls will be returned at the beginning of the array. - """ - desc = "DESC" if descending else "ASC" - nulls_first = "NULLS FIRST" if null_first else "NULLS LAST" - return Expr( - f.array_sort( - array.expr, - Expr.literal(pa.scalar(desc, type=pa.string())).expr, - Expr.literal(pa.scalar(nulls_first, type=pa.string())).expr, - ) - ) - - -def list_sort(array: Expr, descending: bool = False, null_first: bool = False) -> Expr: - """This is an alias for :py:func:`array_sort`.""" - return array_sort(array, descending=descending, null_first=null_first) - - -def array_slice( - array: Expr, begin: Expr, end: Expr, stride: Expr | None = None -) -> Expr: - """Returns a slice of the array.""" - if stride is not None: - stride = stride.expr - return Expr(f.array_slice(array.expr, begin.expr, end.expr, stride)) - - -def list_slice(array: Expr, begin: Expr, end: Expr, stride: Expr | None = None) -> Expr: - """Returns a slice of the array. - - This is an alias for :py:func:`array_slice`. - """ - return array_slice(array, begin, end, stride) - - -def array_intersect(array1: Expr, array2: Expr) -> Expr: - """Returns the intersection of ``array1`` and ``array2``.""" - return Expr(f.array_intersect(array1.expr, array2.expr)) - - -def list_intersect(array1: Expr, array2: Expr) -> Expr: - """Returns an the intersection of ``array1`` and ``array2``. - - This is an alias for :py:func:`array_intersect`. - """ - return array_intersect(array1, array2) - - -def array_union(array1: Expr, array2: Expr) -> Expr: - """Returns an array of the elements in the union of array1 and array2. - - Duplicate rows will not be returned. - """ - return Expr(f.array_union(array1.expr, array2.expr)) - - -def list_union(array1: Expr, array2: Expr) -> Expr: - """Returns an array of the elements in the union of array1 and array2. - - Duplicate rows will not be returned. - - This is an alias for :py:func:`array_union`. - """ - return array_union(array1, array2) - - -def array_except(array1: Expr, array2: Expr) -> Expr: - """Returns the elements that appear in ``array1`` but not in ``array2``.""" - return Expr(f.array_except(array1.expr, array2.expr)) - - -def list_except(array1: Expr, array2: Expr) -> Expr: - """Returns the elements that appear in ``array1`` but not in the ``array2``. - - This is an alias for :py:func:`array_except`. - """ - return array_except(array1, array2) - - -def array_resize(array: Expr, size: Expr, value: Expr) -> Expr: - """Returns an array with the specified size filled. - - If ``size`` is greater than the ``array`` length, the additional entries will - be filled with the given ``value``. - """ - return Expr(f.array_resize(array.expr, size.expr, value.expr)) - - -def list_resize(array: Expr, size: Expr, value: Expr) -> Expr: - """Returns an array with the specified size filled. - - If ``size`` is greater than the ``array`` length, the additional entries will be - filled with the given ``value``. This is an alias for :py:func:`array_resize`. - """ - return array_resize(array, size, value) - - -def flatten(array: Expr) -> Expr: - """Flattens an array of arrays into a single array.""" - return Expr(f.flatten(array.expr)) - - -def cardinality(array: Expr) -> Expr: - """Returns the total number of elements in the array.""" - return Expr(f.cardinality(array.expr)) - - -def empty(array: Expr) -> Expr: - """This is an alias for :py:func:`array_empty`.""" - return array_empty(array) - - -# aggregate functions -def approx_distinct( - expression: Expr, - filter: Expr | None = None, -) -> Expr: - """Returns the approximate number of distinct values. - - This aggregate function is similar to :py:func:`count` with distinct set, but it - will approximate the number of distinct entries. It may return significantly faster - than :py:func:`count` for some DataFrames. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - expression: Values to check for distinct entries - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - - return Expr(f.approx_distinct(expression.expr, filter=filter_raw)) - - -def approx_median(expression: Expr, filter: Expr | None = None) -> Expr: - """Returns the approximate median value. - - This aggregate function is similar to :py:func:`median`, but it will only - approximate the median. It may return significantly faster for some DataFrames. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by`` and ``null_treatment``, and ``distinct``. - - Args: - expression: Values to find the median for - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.approx_median(expression.expr, filter=filter_raw)) - - -def approx_percentile_cont( - sort_expression: Expr | SortExpr, - percentile: float, - num_centroids: int | None = None, - filter: Expr | None = None, -) -> Expr: - """Returns the value that is approximately at a given percentile of ``expr``. - - This aggregate function assumes the input values form a continuous distribution. - Suppose you have a DataFrame which consists of 100 different test scores. If you - called this function with a percentile of 0.9, it would return the value of the - test score that is above 90% of the other test scores. The returned value may be - between two of the values. - - This function uses the [t-digest](https://arxiv.org/abs/1902.04023) algorithm to - compute the percentile. You can limit the number of bins used in this algorithm by - setting the ``num_centroids`` parameter. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - sort_expression: Values for which to find the approximate percentile - percentile: This must be between 0.0 and 1.0, inclusive - num_centroids: Max bin size for the t-digest algorithm - filter: If provided, only compute against rows for which the filter is True - """ - sort_expr_raw = sort_or_default(sort_expression) - filter_raw = filter.expr if filter is not None else None - return Expr( - f.approx_percentile_cont( - sort_expr_raw, percentile, num_centroids=num_centroids, filter=filter_raw - ) - ) - - -def approx_percentile_cont_with_weight( - sort_expression: Expr | SortExpr, - weight: Expr, - percentile: float, - num_centroids: int | None = None, - filter: Expr | None = None, -) -> Expr: - """Returns the value of the weighted approximate percentile. - - This aggregate function is similar to :py:func:`approx_percentile_cont` except that - it uses the associated associated weights. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - sort_expression: Values for which to find the approximate percentile - weight: Relative weight for each of the values in ``expression`` - percentile: This must be between 0.0 and 1.0, inclusive - num_centroids: Max bin size for the t-digest algorithm - filter: If provided, only compute against rows for which the filter is True - - """ - sort_expr_raw = sort_or_default(sort_expression) - filter_raw = filter.expr if filter is not None else None - return Expr( - f.approx_percentile_cont_with_weight( - sort_expr_raw, - weight.expr, - percentile, - num_centroids=num_centroids, - filter=filter_raw, - ) - ) - - -def array_agg( - expression: Expr, - distinct: bool = False, - filter: Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, -) -> Expr: - """Aggregate values into an array. - - Currently ``distinct`` and ``order_by`` cannot be used together. As a work around, - consider :py:func:`array_sort` after aggregation. - [Issue Tracker](https://github.com/apache/datafusion/issues/12371) - - If using the builder functions described in ref:`_aggregation` this function ignores - the option ``null_treatment``. - - Args: - expression: Values to combine into an array - distinct: If True, a single entry for each distinct value will be in the result - filter: If provided, only compute against rows for which the filter is True - order_by: Order the resultant array values. Accepts column names or expressions. - - For example:: - - df.aggregate([], array_agg(col("a"), order_by="b")) - """ - order_by_raw = sort_list_to_raw_sort_list(order_by) - filter_raw = filter.expr if filter is not None else None - - return Expr( - f.array_agg( - expression.expr, distinct=distinct, filter=filter_raw, order_by=order_by_raw - ) - ) - - -def avg( - expression: Expr, - filter: Expr | None = None, -) -> Expr: - """Returns the average value. - - This aggregate function expects a numeric expression and will return a float. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - expression: Values to combine into an array - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.avg(expression.expr, filter=filter_raw)) - - -def corr(value_y: Expr, value_x: Expr, filter: Expr | None = None) -> Expr: - """Returns the correlation coefficient between ``value1`` and ``value2``. - - This aggregate function expects both values to be numeric and will return a float. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - value_y: The dependent variable for correlation - value_x: The independent variable for correlation - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.corr(value_y.expr, value_x.expr, filter=filter_raw)) - - -def count( - expressions: Expr | list[Expr] | None = None, - distinct: bool = False, - filter: Expr | None = None, -) -> Expr: - """Returns the number of rows that match the given arguments. - - This aggregate function will count the non-null rows provided in the expression. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by`` and ``null_treatment``. - - Args: - expressions: Argument to perform bitwise calculation on - distinct: If True, a single entry for each distinct value will be in the result - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - - if expressions is None: - args = [Expr.literal(1).expr] - elif isinstance(expressions, list): - args = [arg.expr for arg in expressions] - else: - args = [expressions.expr] - - return Expr(f.count(*args, distinct=distinct, filter=filter_raw)) - - -def covar_pop(value_y: Expr, value_x: Expr, filter: Expr | None = None) -> Expr: - """Computes the population covariance. - - This aggregate function expects both values to be numeric and will return a float. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - value_y: The dependent variable for covariance - value_x: The independent variable for covariance - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.covar_pop(value_y.expr, value_x.expr, filter=filter_raw)) - - -def covar_samp(value_y: Expr, value_x: Expr, filter: Expr | None = None) -> Expr: - """Computes the sample covariance. - - This aggregate function expects both values to be numeric and will return a float. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - value_y: The dependent variable for covariance - value_x: The independent variable for covariance - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.covar_samp(value_y.expr, value_x.expr, filter=filter_raw)) - - -def covar(value_y: Expr, value_x: Expr, filter: Expr | None = None) -> Expr: - """Computes the sample covariance. - - This is an alias for :py:func:`covar_samp`. - """ - return covar_samp(value_y, value_x, filter) - - -def max(expression: Expr, filter: Expr | None = None) -> Expr: - """Aggregate function that returns the maximum value of the argument. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - expression: The value to find the maximum of - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.max(expression.expr, filter=filter_raw)) - - -def mean(expression: Expr, filter: Expr | None = None) -> Expr: - """Returns the average (mean) value of the argument. - - This is an alias for :py:func:`avg`. - """ - return avg(expression, filter) - - -def median( - expression: Expr, distinct: bool = False, filter: Expr | None = None -) -> Expr: - """Computes the median of a set of numbers. - - This aggregate function returns the median value of the expression for the given - aggregate function. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by`` and ``null_treatment``. - - Args: - expression: The value to compute the median of - distinct: If True, a single entry for each distinct value will be in the result - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.median(expression.expr, distinct=distinct, filter=filter_raw)) - - -def min(expression: Expr, filter: Expr | None = None) -> Expr: - """Aggregate function that returns the minimum value of the argument. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - expression: The value to find the minimum of - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.min(expression.expr, filter=filter_raw)) - - -def sum( - expression: Expr, - filter: Expr | None = None, -) -> Expr: - """Computes the sum of a set of numbers. - - This aggregate function expects a numeric expression. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - expression: Values to combine into an array - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.sum(expression.expr, filter=filter_raw)) - - -def stddev(expression: Expr, filter: Expr | None = None) -> Expr: - """Computes the standard deviation of the argument. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - expression: The value to find the minimum of - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.stddev(expression.expr, filter=filter_raw)) - - -def stddev_pop(expression: Expr, filter: Expr | None = None) -> Expr: - """Computes the population standard deviation of the argument. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - expression: The value to find the minimum of - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.stddev_pop(expression.expr, filter=filter_raw)) - - -def stddev_samp(arg: Expr, filter: Expr | None = None) -> Expr: - """Computes the sample standard deviation of the argument. - - This is an alias for :py:func:`stddev`. - """ - return stddev(arg, filter=filter) - - -def var(expression: Expr, filter: Expr | None = None) -> Expr: - """Computes the sample variance of the argument. - - This is an alias for :py:func:`var_samp`. - """ - return var_samp(expression, filter) - - -def var_pop(expression: Expr, filter: Expr | None = None) -> Expr: - """Computes the population variance of the argument. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - expression: The variable to compute the variance for - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.var_pop(expression.expr, filter=filter_raw)) - - -def var_samp(expression: Expr, filter: Expr | None = None) -> Expr: - """Computes the sample variance of the argument. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - expression: The variable to compute the variance for - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.var_sample(expression.expr, filter=filter_raw)) - - -def var_sample(expression: Expr, filter: Expr | None = None) -> Expr: - """Computes the sample variance of the argument. - - This is an alias for :py:func:`var_samp`. - """ - return var_samp(expression, filter) - - -def regr_avgx( - y: Expr, - x: Expr, - filter: Expr | None = None, -) -> Expr: - """Computes the average of the independent variable ``x``. - - This is a linear regression aggregate function. Only non-null pairs of the inputs - are evaluated. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - y: The linear regression dependent variable - x: The linear regression independent variable - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - - return Expr(f.regr_avgx(y.expr, x.expr, filter=filter_raw)) - - -def regr_avgy( - y: Expr, - x: Expr, - filter: Expr | None = None, -) -> Expr: - """Computes the average of the dependent variable ``y``. - - This is a linear regression aggregate function. Only non-null pairs of the inputs - are evaluated. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - y: The linear regression dependent variable - x: The linear regression independent variable - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - - return Expr(f.regr_avgy(y.expr, x.expr, filter=filter_raw)) - - -def regr_count( - y: Expr, - x: Expr, - filter: Expr | None = None, -) -> Expr: - """Counts the number of rows in which both expressions are not null. - - This is a linear regression aggregate function. Only non-null pairs of the inputs - are evaluated. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - y: The linear regression dependent variable - x: The linear regression independent variable - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - - return Expr(f.regr_count(y.expr, x.expr, filter=filter_raw)) - - -def regr_intercept( - y: Expr, - x: Expr, - filter: Expr | None = None, -) -> Expr: - """Computes the intercept from the linear regression. - - This is a linear regression aggregate function. Only non-null pairs of the inputs - are evaluated. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - y: The linear regression dependent variable - x: The linear regression independent variable - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - - return Expr(f.regr_intercept(y.expr, x.expr, filter=filter_raw)) - - -def regr_r2( - y: Expr, - x: Expr, - filter: Expr | None = None, -) -> Expr: - """Computes the R-squared value from linear regression. - - This is a linear regression aggregate function. Only non-null pairs of the inputs - are evaluated. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - y: The linear regression dependent variable - x: The linear regression independent variable - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - - return Expr(f.regr_r2(y.expr, x.expr, filter=filter_raw)) - - -def regr_slope( - y: Expr, - x: Expr, - filter: Expr | None = None, -) -> Expr: - """Computes the slope from linear regression. - - This is a linear regression aggregate function. Only non-null pairs of the inputs - are evaluated. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - y: The linear regression dependent variable - x: The linear regression independent variable - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - - return Expr(f.regr_slope(y.expr, x.expr, filter=filter_raw)) - - -def regr_sxx( - y: Expr, - x: Expr, - filter: Expr | None = None, -) -> Expr: - """Computes the sum of squares of the independent variable ``x``. - - This is a linear regression aggregate function. Only non-null pairs of the inputs - are evaluated. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - y: The linear regression dependent variable - x: The linear regression independent variable - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - - return Expr(f.regr_sxx(y.expr, x.expr, filter=filter_raw)) - - -def regr_sxy( - y: Expr, - x: Expr, - filter: Expr | None = None, -) -> Expr: - """Computes the sum of products of pairs of numbers. - - This is a linear regression aggregate function. Only non-null pairs of the inputs - are evaluated. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - y: The linear regression dependent variable - x: The linear regression independent variable - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - - return Expr(f.regr_sxy(y.expr, x.expr, filter=filter_raw)) - - -def regr_syy( - y: Expr, - x: Expr, - filter: Expr | None = None, -) -> Expr: - """Computes the sum of squares of the dependent variable ``y``. - - This is a linear regression aggregate function. Only non-null pairs of the inputs - are evaluated. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - y: The linear regression dependent variable - x: The linear regression independent variable - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - - return Expr(f.regr_syy(y.expr, x.expr, filter=filter_raw)) - - -def first_value( - expression: Expr, - filter: Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, - null_treatment: NullTreatment = NullTreatment.RESPECT_NULLS, -) -> Expr: - """Returns the first value in a group of values. - - This aggregate function will return the first value in the partition. - - If using the builder functions described in ref:`_aggregation` this function ignores - the option ``distinct``. - - Args: - expression: Argument to perform bitwise calculation on - filter: If provided, only compute against rows for which the filter is True - order_by: Set the ordering of the expression to evaluate. Accepts - column names or expressions. - null_treatment: Assign whether to respect or ignore null values. - - For example:: - - df.aggregate([], first_value(col("a"), order_by="ts")) - """ - order_by_raw = sort_list_to_raw_sort_list(order_by) - filter_raw = filter.expr if filter is not None else None - - return Expr( - f.first_value( - expression.expr, - filter=filter_raw, - order_by=order_by_raw, - null_treatment=null_treatment.value, - ) - ) - - -def last_value( - expression: Expr, - filter: Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, - null_treatment: NullTreatment = NullTreatment.RESPECT_NULLS, -) -> Expr: - """Returns the last value in a group of values. - - This aggregate function will return the last value in the partition. - - If using the builder functions described in ref:`_aggregation` this function ignores - the option ``distinct``. - - Args: - expression: Argument to perform bitwise calculation on - filter: If provided, only compute against rows for which the filter is True - order_by: Set the ordering of the expression to evaluate. Accepts - column names or expressions. - null_treatment: Assign whether to respect or ignore null values. - - For example:: - - df.aggregate([], last_value(col("a"), order_by="ts")) - """ - order_by_raw = sort_list_to_raw_sort_list(order_by) - filter_raw = filter.expr if filter is not None else None - - return Expr( - f.last_value( - expression.expr, - filter=filter_raw, - order_by=order_by_raw, - null_treatment=null_treatment.value, - ) - ) - - -def nth_value( - expression: Expr, - n: int, - filter: Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, - null_treatment: NullTreatment = NullTreatment.RESPECT_NULLS, -) -> Expr: - """Returns the n-th value in a group of values. - - This aggregate function will return the n-th value in the partition. - - If using the builder functions described in ref:`_aggregation` this function ignores - the option ``distinct``. - - Args: - expression: Argument to perform bitwise calculation on - n: Index of value to return. Starts at 1. - filter: If provided, only compute against rows for which the filter is True - order_by: Set the ordering of the expression to evaluate. Accepts - column names or expressions. - null_treatment: Assign whether to respect or ignore null values. - - For example:: - - df.aggregate([], nth_value(col("a"), 2, order_by="ts")) - """ - order_by_raw = sort_list_to_raw_sort_list(order_by) - filter_raw = filter.expr if filter is not None else None - - return Expr( - f.nth_value( - expression.expr, - n, - filter=filter_raw, - order_by=order_by_raw, - null_treatment=null_treatment.value, - ) - ) - - -def bit_and(expression: Expr, filter: Expr | None = None) -> Expr: - """Computes the bitwise AND of the argument. - - This aggregate function will bitwise compare every value in the input partition. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - expression: Argument to perform bitwise calculation on - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.bit_and(expression.expr, filter=filter_raw)) - - -def bit_or(expression: Expr, filter: Expr | None = None) -> Expr: - """Computes the bitwise OR of the argument. - - This aggregate function will bitwise compare every value in the input partition. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - expression: Argument to perform bitwise calculation on - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.bit_or(expression.expr, filter=filter_raw)) - - -def bit_xor( - expression: Expr, distinct: bool = False, filter: Expr | None = None -) -> Expr: - """Computes the bitwise XOR of the argument. - - This aggregate function will bitwise compare every value in the input partition. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by`` and ``null_treatment``. - - Args: - expression: Argument to perform bitwise calculation on - distinct: If True, evaluate each unique value of expression only once - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.bit_xor(expression.expr, distinct=distinct, filter=filter_raw)) - - -def bool_and(expression: Expr, filter: Expr | None = None) -> Expr: - """Computes the boolean AND of the argument. - - This aggregate function will compare every value in the input partition. These are - expected to be boolean values. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - expression: Argument to perform calculation on - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.bool_and(expression.expr, filter=filter_raw)) - - -def bool_or(expression: Expr, filter: Expr | None = None) -> Expr: - """Computes the boolean OR of the argument. - - This aggregate function will compare every value in the input partition. These are - expected to be boolean values. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``order_by``, ``null_treatment``, and ``distinct``. - - Args: - expression: Argument to perform calculation on - filter: If provided, only compute against rows for which the filter is True - """ - filter_raw = filter.expr if filter is not None else None - return Expr(f.bool_or(expression.expr, filter=filter_raw)) - - -def lead( - arg: Expr, - shift_offset: int = 1, - default_value: Any | None = None, - partition_by: list[Expr] | Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, -) -> Expr: - """Create a lead window function. - - Lead operation will return the argument that is in the next shift_offset-th row in - the partition. For example ``lead(col("b"), shift_offset=3, default_value=5)`` will - return the 3rd following value in column ``b``. At the end of the partition, where - no further values can be returned it will return the default value of 5. - - Here is an example of both the ``lead`` and :py:func:`datafusion.functions.lag` - functions on a simple DataFrame:: - - +--------+------+-----+ - | points | lead | lag | - +--------+------+-----+ - | 100 | 100 | | - | 100 | 50 | 100 | - | 50 | 25 | 100 | - | 25 | | 50 | - +--------+------+-----+ - - To set window function parameters use the window builder approach described in the - ref:`_window_functions` online documentation. - - Args: - arg: Value to return - shift_offset: Number of rows following the current row. - default_value: Value to return if shift_offet row does not exist. - partition_by: Expressions to partition the window frame on. - order_by: Set ordering within the window frame. Accepts - column names or expressions. - - For example:: - - lead(col("b"), order_by="ts") - """ - if not isinstance(default_value, pa.Scalar) and default_value is not None: - default_value = pa.scalar(default_value) - - partition_by_raw = expr_list_to_raw_expr_list(partition_by) - order_by_raw = sort_list_to_raw_sort_list(order_by) - - return Expr( - f.lead( - arg.expr, - shift_offset, - default_value, - partition_by=partition_by_raw, - order_by=order_by_raw, - ) - ) - - -def lag( - arg: Expr, - shift_offset: int = 1, - default_value: Any | None = None, - partition_by: list[Expr] | Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, -) -> Expr: - """Create a lag window function. - - Lag operation will return the argument that is in the previous shift_offset-th row - in the partition. For example ``lag(col("b"), shift_offset=3, default_value=5)`` - will return the 3rd previous value in column ``b``. At the beginning of the - partition, where no values can be returned it will return the default value of 5. - - Here is an example of both the ``lag`` and :py:func:`datafusion.functions.lead` - functions on a simple DataFrame:: - - +--------+------+-----+ - | points | lead | lag | - +--------+------+-----+ - | 100 | 100 | | - | 100 | 50 | 100 | - | 50 | 25 | 100 | - | 25 | | 50 | - +--------+------+-----+ - - Args: - arg: Value to return - shift_offset: Number of rows before the current row. - default_value: Value to return if shift_offet row does not exist. - partition_by: Expressions to partition the window frame on. - order_by: Set ordering within the window frame. Accepts - column names or expressions. - - For example:: - - lag(col("b"), order_by="ts") - """ - if not isinstance(default_value, pa.Scalar): - default_value = pa.scalar(default_value) - - partition_by_raw = expr_list_to_raw_expr_list(partition_by) - order_by_raw = sort_list_to_raw_sort_list(order_by) - - return Expr( - f.lag( - arg.expr, - shift_offset, - default_value, - partition_by=partition_by_raw, - order_by=order_by_raw, - ) - ) - - -def row_number( - partition_by: list[Expr] | Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, -) -> Expr: - """Create a row number window function. - - Returns the row number of the window function. - - Here is an example of the ``row_number`` on a simple DataFrame:: - - +--------+------------+ - | points | row number | - +--------+------------+ - | 100 | 1 | - | 100 | 2 | - | 50 | 3 | - | 25 | 4 | - +--------+------------+ - - Args: - partition_by: Expressions to partition the window frame on. - order_by: Set ordering within the window frame. Accepts - column names or expressions. - - For example:: - - row_number(order_by="points") - """ - partition_by_raw = expr_list_to_raw_expr_list(partition_by) - order_by_raw = sort_list_to_raw_sort_list(order_by) - - return Expr( - f.row_number( - partition_by=partition_by_raw, - order_by=order_by_raw, - ) - ) - - -def rank( - partition_by: list[Expr] | Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, -) -> Expr: - """Create a rank window function. - - Returns the rank based upon the window order. Consecutive equal values will receive - the same rank, but the next different value will not be consecutive but rather the - number of rows that precede it plus one. This is similar to Olympic medals. If two - people tie for gold, the next place is bronze. There would be no silver medal. Here - is an example of a dataframe with a window ordered by descending ``points`` and the - associated rank. - - You should set ``order_by`` to produce meaningful results:: - - +--------+------+ - | points | rank | - +--------+------+ - | 100 | 1 | - | 100 | 1 | - | 50 | 3 | - | 25 | 4 | - +--------+------+ - - Args: - partition_by: Expressions to partition the window frame on. - order_by: Set ordering within the window frame. Accepts - column names or expressions. - - For example:: - - rank(order_by="points") - """ - partition_by_raw = expr_list_to_raw_expr_list(partition_by) - order_by_raw = sort_list_to_raw_sort_list(order_by) - - return Expr( - f.rank( - partition_by=partition_by_raw, - order_by=order_by_raw, - ) - ) - - -def dense_rank( - partition_by: list[Expr] | Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, -) -> Expr: - """Create a dense_rank window function. - - This window function is similar to :py:func:`rank` except that the returned values - will be consecutive. Here is an example of a dataframe with a window ordered by - descending ``points`` and the associated dense rank:: - - +--------+------------+ - | points | dense_rank | - +--------+------------+ - | 100 | 1 | - | 100 | 1 | - | 50 | 2 | - | 25 | 3 | - +--------+------------+ - - Args: - partition_by: Expressions to partition the window frame on. - order_by: Set ordering within the window frame. Accepts - column names or expressions. - - For example:: - - dense_rank(order_by="points") - """ - partition_by_raw = expr_list_to_raw_expr_list(partition_by) - order_by_raw = sort_list_to_raw_sort_list(order_by) - - return Expr( - f.dense_rank( - partition_by=partition_by_raw, - order_by=order_by_raw, - ) - ) - - -def percent_rank( - partition_by: list[Expr] | Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, -) -> Expr: - """Create a percent_rank window function. - - This window function is similar to :py:func:`rank` except that the returned values - are the percentage from 0.0 to 1.0 from first to last. Here is an example of a - dataframe with a window ordered by descending ``points`` and the associated percent - rank:: - - +--------+--------------+ - | points | percent_rank | - +--------+--------------+ - | 100 | 0.0 | - | 100 | 0.0 | - | 50 | 0.666667 | - | 25 | 1.0 | - +--------+--------------+ - - Args: - partition_by: Expressions to partition the window frame on. - order_by: Set ordering within the window frame. Accepts - column names or expressions. - - For example:: - - percent_rank(order_by="points") - """ - partition_by_raw = expr_list_to_raw_expr_list(partition_by) - order_by_raw = sort_list_to_raw_sort_list(order_by) - - return Expr( - f.percent_rank( - partition_by=partition_by_raw, - order_by=order_by_raw, - ) - ) - - -def cume_dist( - partition_by: list[Expr] | Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, -) -> Expr: - """Create a cumulative distribution window function. - - This window function is similar to :py:func:`rank` except that the returned values - are the ratio of the row number to the total number of rows. Here is an example of a - dataframe with a window ordered by descending ``points`` and the associated - cumulative distribution:: - - +--------+-----------+ - | points | cume_dist | - +--------+-----------+ - | 100 | 0.5 | - | 100 | 0.5 | - | 50 | 0.75 | - | 25 | 1.0 | - +--------+-----------+ - - Args: - partition_by: Expressions to partition the window frame on. - order_by: Set ordering within the window frame. Accepts - column names or expressions. - - For example:: - - cume_dist(order_by="points") - """ - partition_by_raw = expr_list_to_raw_expr_list(partition_by) - order_by_raw = sort_list_to_raw_sort_list(order_by) - - return Expr( - f.cume_dist( - partition_by=partition_by_raw, - order_by=order_by_raw, - ) - ) - - -def ntile( - groups: int, - partition_by: list[Expr] | Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, -) -> Expr: - """Create a n-tile window function. - - This window function orders the window frame into a give number of groups based on - the ordering criteria. It then returns which group the current row is assigned to. - Here is an example of a dataframe with a window ordered by descending ``points`` - and the associated n-tile function:: - - +--------+-------+ - | points | ntile | - +--------+-------+ - | 120 | 1 | - | 100 | 1 | - | 80 | 2 | - | 60 | 2 | - | 40 | 3 | - | 20 | 3 | - +--------+-------+ - - Args: - groups: Number of groups for the n-tile to be divided into. - partition_by: Expressions to partition the window frame on. - order_by: Set ordering within the window frame. Accepts - column names or expressions. - - For example:: - - ntile(3, order_by="points") - """ - partition_by_raw = expr_list_to_raw_expr_list(partition_by) - order_by_raw = sort_list_to_raw_sort_list(order_by) - - return Expr( - f.ntile( - Expr.literal(groups).expr, - partition_by=partition_by_raw, - order_by=order_by_raw, - ) - ) - - -def string_agg( - expression: Expr, - delimiter: str, - filter: Expr | None = None, - order_by: list[SortKey] | SortKey | None = None, -) -> Expr: - """Concatenates the input strings. - - This aggregate function will concatenate input strings, ignoring null values, and - separating them with the specified delimiter. Non-string values will be converted to - their string equivalents. - - If using the builder functions described in ref:`_aggregation` this function ignores - the options ``distinct`` and ``null_treatment``. - - Args: - expression: Argument to perform bitwise calculation on - delimiter: Text to place between each value of expression - filter: If provided, only compute against rows for which the filter is True - order_by: Set the ordering of the expression to evaluate. Accepts - column names or expressions. - - For example:: - - df.aggregate([], string_agg(col("a"), ",", order_by="b")) - """ - order_by_raw = sort_list_to_raw_sort_list(order_by) - filter_raw = filter.expr if filter is not None else None - - return Expr( - f.string_agg( - expression.expr, - delimiter, - filter=filter_raw, - order_by=order_by_raw, - ) - ) diff --git a/python/datafusion/html_formatter.py b/python/datafusion/html_formatter.py deleted file mode 100644 index 65eb1f042..000000000 --- a/python/datafusion/html_formatter.py +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Deprecated module for dataframe formatting.""" - -import warnings - -from datafusion.dataframe_formatter import * # noqa: F403 - -warnings.warn( - "The module 'html_formatter' is deprecated and will be removed in the next release." - "Please use 'dataframe_formatter' instead.", - DeprecationWarning, - stacklevel=3, -) diff --git a/python/datafusion/input/__init__.py b/python/datafusion/input/__init__.py deleted file mode 100644 index f0c1f42b4..000000000 --- a/python/datafusion/input/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""This package provides for input sources. - -The primary class used within DataFusion is ``LocationInputPlugin``. -""" - -from .location import LocationInputPlugin - -__all__ = [ - "LocationInputPlugin", -] diff --git a/python/datafusion/input/base.py b/python/datafusion/input/base.py deleted file mode 100644 index f67dde2a1..000000000 --- a/python/datafusion/input/base.py +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""This module provides ``BaseInputSource``. - -A user can extend this to provide a custom input source. -""" - -from abc import ABC, abstractmethod -from typing import Any - -from datafusion.common import SqlTable - - -class BaseInputSource(ABC): - """Base Input Source class. - - If a consuming library would like to provider their own InputSource this is - the class they should extend to write their own. - - Once completed the Plugin InputSource can be registered with the - SessionContext to ensure that it will be used in order - to obtain the SqlTable information from the custom datasource. - """ - - @abstractmethod - def is_correct_input(self, input_item: Any, table_name: str, **kwargs: Any) -> bool: - """Returns `True` if the input is valid.""" - - @abstractmethod - def build_table(self, input_item: Any, table_name: str, **kwarg: Any) -> SqlTable: # type: ignore[invalid-type-form] - """Create a table from the input source.""" diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py deleted file mode 100644 index b804ac18b..000000000 --- a/python/datafusion/input/location.py +++ /dev/null @@ -1,89 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""The default input source for DataFusion.""" - -from pathlib import Path -from typing import Any - -from datafusion.common import DataTypeMap, SqlTable -from datafusion.input.base import BaseInputSource - - -class LocationInputPlugin(BaseInputSource): - """Input Plugin for everything. - - This can be read in from a file (on disk, remote etc.). - """ - - def is_correct_input(self, input_item: Any, table_name: str, **kwargs: Any) -> bool: # noqa: ARG002 - """Returns `True` if the input is valid.""" - return isinstance(input_item, str) - - def build_table( - self, - input_item: str, - table_name: str, - **kwargs: Any, # noqa: ARG002 - ) -> SqlTable: # type: ignore[invalid-type-form] - """Create a table from the input source.""" - extension = Path(input_item).suffix - file_format = extension.lstrip(".").lower() - num_rows = 0 # Total number of rows in the file. Used for statistics - columns = [] - if file_format == "parquet": - import pyarrow.parquet as pq - - # Read the Parquet metadata - metadata = pq.read_metadata(input_item) - num_rows = metadata.num_rows - # Iterate through the schema and build the SqlTable - columns = [ - ( - col.name, - DataTypeMap.from_parquet_type_str(col.physical_type), - ) - for col in metadata.schema - ] - - elif format == "csv": - import csv - - # Consume header row and count number of rows for statistics. - # TODO: Possibly makes sense to have the eager number of rows - # calculated as a configuration since you must read the entire file - # to get that information. However, this should only be occurring - # at table creation time and therefore shouldn't - # slow down query performance. - with Path(input_item).open() as file: - reader = csv.reader(file) - _header_row = next(reader) - for _ in reader: - num_rows += 1 - # TODO: Need to actually consume this row into reasonable columns - msg = "TODO: Currently unable to support CSV input files." - raise RuntimeError(msg) - else: - msg = f"Input of format: `{format}` is currently not supported.\ - Only Parquet and CSV." - raise RuntimeError(msg) - - # Input could possibly be multiple files. Create a list if so - input_path = Path(input_item) - input_files = [str(p) for p in input_path.parent.glob(input_path.name)] - - return SqlTable(table_name, columns, num_rows, input_files) diff --git a/python/datafusion/io.py b/python/datafusion/io.py deleted file mode 100644 index 4f9c3c516..000000000 --- a/python/datafusion/io.py +++ /dev/null @@ -1,197 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""IO read functions using global context.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -from datafusion.context import SessionContext - -if TYPE_CHECKING: - import pathlib - - import pyarrow as pa - - from datafusion.dataframe import DataFrame - from datafusion.expr import Expr - - from .options import CsvReadOptions - - -def read_parquet( - path: str | pathlib.Path, - table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, - parquet_pruning: bool = True, - file_extension: str = ".parquet", - skip_metadata: bool = True, - schema: pa.Schema | None = None, - file_sort_order: list[list[Expr]] | None = None, -) -> DataFrame: - """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. - - This function will use the global context. Any functions or tables registered - with another context may not be accessible when used with a DataFrame created - using this function. - - Args: - path: Path to the Parquet file. - table_partition_cols: Partition columns. - parquet_pruning: Whether the parquet reader should use the predicate - to prune row groups. - file_extension: File extension; only files with this extension are - selected for data input. - skip_metadata: Whether the parquet reader should skip any metadata - that may be in the file schema. This can help avoid schema - conflicts due to metadata. - schema: An optional schema representing the parquet files. If None, - the parquet reader will try to infer it based on data in the - file. - file_sort_order: Sort order for the file. - - Returns: - DataFrame representation of the read Parquet files - """ - if table_partition_cols is None: - table_partition_cols = [] - return SessionContext.global_ctx().read_parquet( - str(path), - table_partition_cols, - parquet_pruning, - file_extension, - skip_metadata, - schema, - file_sort_order, - ) - - -def read_json( - path: str | pathlib.Path, - schema: pa.Schema | None = None, - schema_infer_max_records: int = 1000, - file_extension: str = ".json", - table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, - file_compression_type: str | None = None, -) -> DataFrame: - """Read a line-delimited JSON data source. - - This function will use the global context. Any functions or tables registered - with another context may not be accessible when used with a DataFrame created - using this function. - - Args: - path: Path to the JSON file. - schema: The data source schema. - schema_infer_max_records: Maximum number of rows to read from JSON - files for schema inference if needed. - file_extension: File extension; only files with this extension are - selected for data input. - table_partition_cols: Partition columns. - file_compression_type: File compression type. - - Returns: - DataFrame representation of the read JSON files. - """ - if table_partition_cols is None: - table_partition_cols = [] - return SessionContext.global_ctx().read_json( - str(path), - schema, - schema_infer_max_records, - file_extension, - table_partition_cols, - file_compression_type, - ) - - -def read_csv( - path: str | pathlib.Path | list[str] | list[pathlib.Path], - schema: pa.Schema | None = None, - has_header: bool = True, - delimiter: str = ",", - schema_infer_max_records: int = 1000, - file_extension: str = ".csv", - table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, - file_compression_type: str | None = None, - options: CsvReadOptions | None = None, -) -> DataFrame: - """Read a CSV data source. - - This function will use the global context. Any functions or tables registered - with another context may not be accessible when used with a DataFrame created - using this function. - - Args: - path: Path to the CSV file - schema: An optional schema representing the CSV files. If None, the - CSV reader will try to infer it based on data in file. - has_header: Whether the CSV file have a header. If schema inference - is run on a file with no headers, default column names are - created. - delimiter: An optional column delimiter. - schema_infer_max_records: Maximum number of rows to read from CSV - files for schema inference if needed. - file_extension: File extension; only files with this extension are - selected for data input. - table_partition_cols: Partition columns. - file_compression_type: File compression type. - options: Set advanced options for CSV reading. This cannot be - combined with any of the other options in this method. - - Returns: - DataFrame representation of the read CSV files - """ - return SessionContext.global_ctx().read_csv( - path, - schema, - has_header, - delimiter, - schema_infer_max_records, - file_extension, - table_partition_cols, - file_compression_type, - options, - ) - - -def read_avro( - path: str | pathlib.Path, - schema: pa.Schema | None = None, - file_partition_cols: list[tuple[str, str | pa.DataType]] | None = None, - file_extension: str = ".avro", -) -> DataFrame: - """Create a :py:class:`DataFrame` for reading Avro data source. - - This function will use the global context. Any functions or tables registered - with another context may not be accessible when used with a DataFrame created - using this function. - - Args: - path: Path to the Avro file. - schema: The data source schema. - file_partition_cols: Partition columns. - file_extension: File extension to select. - - Returns: - DataFrame representation of the read Avro file - """ - if file_partition_cols is None: - file_partition_cols = [] - return SessionContext.global_ctx().read_avro( - str(path), schema, file_partition_cols, file_extension - ) diff --git a/python/datafusion/object_store.py b/python/datafusion/object_store.py deleted file mode 100644 index 6298526f5..000000000 --- a/python/datafusion/object_store.py +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Object store functionality.""" - -from ._internal import object_store - -AmazonS3 = object_store.AmazonS3 -GoogleCloud = object_store.GoogleCloud -LocalFileSystem = object_store.LocalFileSystem -MicrosoftAzure = object_store.MicrosoftAzure -Http = object_store.Http - -__all__ = ["AmazonS3", "GoogleCloud", "Http", "LocalFileSystem", "MicrosoftAzure"] diff --git a/python/datafusion/options.py b/python/datafusion/options.py deleted file mode 100644 index ec19f37d0..000000000 --- a/python/datafusion/options.py +++ /dev/null @@ -1,284 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Options for reading various file formats.""" - -from __future__ import annotations - -import warnings -from typing import TYPE_CHECKING - -import pyarrow as pa - -from datafusion.expr import sort_list_to_raw_sort_list - -if TYPE_CHECKING: - from datafusion.expr import SortExpr - -from ._internal import options - -__all__ = ["CsvReadOptions"] - -DEFAULT_MAX_INFER_SCHEMA = 1000 - - -class CsvReadOptions: - """Options for reading CSV files. - - This class provides a builder pattern for configuring CSV reading options. - All methods starting with ``with_`` return ``self`` to allow method chaining. - """ - - def __init__( - self, - *, - has_header: bool = True, - delimiter: str = ",", - quote: str = '"', - terminator: str | None = None, - escape: str | None = None, - comment: str | None = None, - newlines_in_values: bool = False, - schema: pa.Schema | None = None, - schema_infer_max_records: int = DEFAULT_MAX_INFER_SCHEMA, - file_extension: str = ".csv", - table_partition_cols: list[tuple[str, pa.DataType]] | None = None, - file_compression_type: str = "", - file_sort_order: list[list[SortExpr]] | None = None, - null_regex: str | None = None, - truncated_rows: bool = False, - ) -> None: - """Initialize CsvReadOptions. - - Args: - has_header: Does the CSV file have a header row? If schema inference - is run on a file with no headers, default column names are created. - delimiter: Column delimiter character. Must be a single ASCII character. - quote: Quote character for fields containing delimiters or newlines. - Must be a single ASCII character. - terminator: Optional line terminator character. If ``None``, uses CRLF. - Must be a single ASCII character. - escape: Optional escape character for quotes. Must be a single ASCII - character. - comment: If specified, lines beginning with this character are ignored. - Must be a single ASCII character. - newlines_in_values: Whether newlines in quoted values are supported. - Parsing newlines in quoted values may be affected by execution - behavior such as parallel file scanning. Setting this to ``True`` - ensures that newlines in values are parsed successfully, which may - reduce performance. - schema: Optional PyArrow schema representing the CSV files. If ``None``, - the CSV reader will try to infer it based on data in the file. - schema_infer_max_records: Maximum number of rows to read from CSV files - for schema inference if needed. - file_extension: File extension; only files with this extension are - selected for data input. - table_partition_cols: Partition columns as a list of tuples of - (column_name, data_type). - file_compression_type: File compression type. Supported values are - ``"gzip"``, ``"bz2"``, ``"xz"``, ``"zstd"``, or empty string for - uncompressed. - file_sort_order: Optional sort order of the files as a list of sort - expressions per file. - null_regex: Optional regex pattern to match null values in the CSV. - truncated_rows: Whether to allow truncated rows when parsing. By default - this is ``False`` and will error if the CSV rows have different - lengths. When set to ``True``, it will allow records with less than - the expected number of columns and fill the missing columns with - nulls. If the record's schema is not nullable, it will still return - an error. - """ - validate_single_character("delimiter", delimiter) - validate_single_character("quote", quote) - validate_single_character("terminator", terminator) - validate_single_character("escape", escape) - validate_single_character("comment", comment) - - self.has_header = has_header - self.delimiter = delimiter - self.quote = quote - self.terminator = terminator - self.escape = escape - self.comment = comment - self.newlines_in_values = newlines_in_values - self.schema = schema - self.schema_infer_max_records = schema_infer_max_records - self.file_extension = file_extension - self.table_partition_cols = table_partition_cols or [] - self.file_compression_type = file_compression_type - self.file_sort_order = file_sort_order or [] - self.null_regex = null_regex - self.truncated_rows = truncated_rows - - def with_has_header(self, has_header: bool) -> CsvReadOptions: - """Configure whether the CSV has a header row.""" - self.has_header = has_header - return self - - def with_delimiter(self, delimiter: str) -> CsvReadOptions: - """Configure the column delimiter.""" - self.delimiter = delimiter - return self - - def with_quote(self, quote: str) -> CsvReadOptions: - """Configure the quote character.""" - self.quote = quote - return self - - def with_terminator(self, terminator: str | None) -> CsvReadOptions: - """Configure the line terminator character.""" - self.terminator = terminator - return self - - def with_escape(self, escape: str | None) -> CsvReadOptions: - """Configure the escape character.""" - self.escape = escape - return self - - def with_comment(self, comment: str | None) -> CsvReadOptions: - """Configure the comment character.""" - self.comment = comment - return self - - def with_newlines_in_values(self, newlines_in_values: bool) -> CsvReadOptions: - """Configure whether newlines in values are supported.""" - self.newlines_in_values = newlines_in_values - return self - - def with_schema(self, schema: pa.Schema | None) -> CsvReadOptions: - """Configure the schema.""" - self.schema = schema - return self - - def with_schema_infer_max_records( - self, schema_infer_max_records: int - ) -> CsvReadOptions: - """Configure maximum records for schema inference.""" - self.schema_infer_max_records = schema_infer_max_records - return self - - def with_file_extension(self, file_extension: str) -> CsvReadOptions: - """Configure the file extension filter.""" - self.file_extension = file_extension - return self - - def with_table_partition_cols( - self, table_partition_cols: list[tuple[str, pa.DataType]] - ) -> CsvReadOptions: - """Configure table partition columns.""" - self.table_partition_cols = table_partition_cols - return self - - def with_file_compression_type(self, file_compression_type: str) -> CsvReadOptions: - """Configure file compression type.""" - self.file_compression_type = file_compression_type - return self - - def with_file_sort_order( - self, file_sort_order: list[list[SortExpr]] - ) -> CsvReadOptions: - """Configure file sort order.""" - self.file_sort_order = file_sort_order - return self - - def with_null_regex(self, null_regex: str | None) -> CsvReadOptions: - """Configure null value regex pattern.""" - self.null_regex = null_regex - return self - - def with_truncated_rows(self, truncated_rows: bool) -> CsvReadOptions: - """Configure whether to allow truncated rows.""" - self.truncated_rows = truncated_rows - return self - - def to_inner(self) -> options.CsvReadOptions: - """Convert this object into the underlying Rust structure. - - This is intended for internal use only. - """ - file_sort_order = ( - [] - if self.file_sort_order is None - else [ - sort_list_to_raw_sort_list(sort_list) - for sort_list in self.file_sort_order - ] - ) - - return options.CsvReadOptions( - has_header=self.has_header, - delimiter=ord(self.delimiter[0]) if self.delimiter else ord(","), - quote=ord(self.quote[0]) if self.quote else ord('"'), - terminator=ord(self.terminator[0]) if self.terminator else None, - escape=ord(self.escape[0]) if self.escape else None, - comment=ord(self.comment[0]) if self.comment else None, - newlines_in_values=self.newlines_in_values, - schema=self.schema, - schema_infer_max_records=self.schema_infer_max_records, - file_extension=self.file_extension, - table_partition_cols=_convert_table_partition_cols( - self.table_partition_cols - ), - file_compression_type=self.file_compression_type or "", - file_sort_order=file_sort_order, - null_regex=self.null_regex, - truncated_rows=self.truncated_rows, - ) - - -def validate_single_character(name: str, value: str | None) -> None: - if value is not None and len(value) != 1: - message = f"{name} must be a single character" - raise ValueError(message) - - -def _convert_table_partition_cols( - table_partition_cols: list[tuple[str, str | pa.DataType]], -) -> list[tuple[str, pa.DataType]]: - warn = False - converted_table_partition_cols = [] - - for col, data_type in table_partition_cols: - if isinstance(data_type, str): - warn = True - if data_type == "string": - converted_data_type = pa.string() - elif data_type == "int": - converted_data_type = pa.int32() - else: - message = ( - f"Unsupported literal data type '{data_type}' for partition " - "column. Supported types are 'string' and 'int'" - ) - raise ValueError(message) - else: - converted_data_type = data_type - - converted_table_partition_cols.append((col, converted_data_type)) - - if warn: - message = ( - "using literals for table_partition_cols data types is deprecated," - "use pyarrow types instead" - ) - warnings.warn( - message, - category=DeprecationWarning, - stacklevel=2, - ) - - return converted_table_partition_cols diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py deleted file mode 100644 index fb54fd624..000000000 --- a/python/datafusion/plan.py +++ /dev/null @@ -1,153 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""This module supports physical and logical plans in DataFusion.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, Any - -import datafusion._internal as df_internal - -if TYPE_CHECKING: - from datafusion.context import SessionContext - -__all__ = [ - "ExecutionPlan", - "LogicalPlan", -] - - -class LogicalPlan: - """Logical Plan. - - A `LogicalPlan` is a node in a tree of relational operators (such as - Projection or Filter). - - Represents transforming an input relation (table) to an output relation - (table) with a potentially different schema. Plans form a dataflow tree - where data flows from leaves up to the root to produce the query result. - - A `LogicalPlan` can be created by the SQL query planner, the DataFrame API, - or programmatically (for example custom query languages). - """ - - def __init__(self, plan: df_internal.LogicalPlan) -> None: - """This constructor should not be called by the end user.""" - self._raw_plan = plan - - def to_variant(self) -> Any: - """Convert the logical plan into its specific variant.""" - return self._raw_plan.to_variant() - - def inputs(self) -> list[LogicalPlan]: - """Returns the list of inputs to the logical plan.""" - return [LogicalPlan(p) for p in self._raw_plan.inputs()] - - def __repr__(self) -> str: - """Generate a printable representation of the plan.""" - return self._raw_plan.__repr__() - - def display(self) -> str: - """Print the logical plan.""" - return self._raw_plan.display() - - def display_indent(self) -> str: - """Print an indented form of the logical plan.""" - return self._raw_plan.display_indent() - - def display_indent_schema(self) -> str: - """Print an indented form of the schema for the logical plan.""" - return self._raw_plan.display_indent_schema() - - def display_graphviz(self) -> str: - """Print the graph visualization of the logical plan. - - Returns a `format`able structure that produces lines meant for graphical display - using the `DOT` language. This format can be visualized using software from - [`graphviz`](https://graphviz.org/) - """ - return self._raw_plan.display_graphviz() - - @staticmethod - def from_proto(ctx: SessionContext, data: bytes) -> LogicalPlan: - """Create a LogicalPlan from protobuf bytes. - - Tables created in memory from record batches are currently not supported. - """ - return LogicalPlan(df_internal.LogicalPlan.from_proto(ctx.ctx, data)) - - def to_proto(self) -> bytes: - """Convert a LogicalPlan to protobuf bytes. - - Tables created in memory from record batches are currently not supported. - """ - return self._raw_plan.to_proto() - - def __eq__(self, other: LogicalPlan) -> bool: - """Test equality.""" - if not isinstance(other, LogicalPlan): - return False - return self._raw_plan.__eq__(other._raw_plan) - - -class ExecutionPlan: - """Represent nodes in the DataFusion Physical Plan.""" - - def __init__(self, plan: df_internal.ExecutionPlan) -> None: - """This constructor should not be called by the end user.""" - self._raw_plan = plan - - def children(self) -> list[ExecutionPlan]: - """Get a list of children `ExecutionPlan` that act as inputs to this plan. - - The returned list will be empty for leaf nodes such as scans, will contain a - single value for unary nodes, or two values for binary nodes (such as joins). - """ - return [ExecutionPlan(e) for e in self._raw_plan.children()] - - def display(self) -> str: - """Print the physical plan.""" - return self._raw_plan.display() - - def display_indent(self) -> str: - """Print an indented form of the physical plan.""" - return self._raw_plan.display_indent() - - def __repr__(self) -> str: - """Print a string representation of the physical plan.""" - return self._raw_plan.__repr__() - - @property - def partition_count(self) -> int: - """Returns the number of partitions in the physical plan.""" - return self._raw_plan.partition_count - - @staticmethod - def from_proto(ctx: SessionContext, data: bytes) -> ExecutionPlan: - """Create an ExecutionPlan from protobuf bytes. - - Tables created in memory from record batches are currently not supported. - """ - return ExecutionPlan(df_internal.ExecutionPlan.from_proto(ctx.ctx, data)) - - def to_proto(self) -> bytes: - """Convert an ExecutionPlan into protobuf bytes. - - Tables created in memory from record batches are currently not supported. - """ - return self._raw_plan.to_proto() diff --git a/python/datafusion/py.typed b/python/datafusion/py.typed deleted file mode 100644 index d216be4dd..000000000 --- a/python/datafusion/py.typed +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. \ No newline at end of file diff --git a/python/datafusion/record_batch.py b/python/datafusion/record_batch.py deleted file mode 100644 index c24cde0ac..000000000 --- a/python/datafusion/record_batch.py +++ /dev/null @@ -1,101 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""This module provides the classes for handling record batches. - -These are typically the result of dataframe -:py:func:`datafusion.dataframe.execute_stream` operations. -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - import pyarrow as pa - import typing_extensions - - import datafusion._internal as df_internal - - -class RecordBatch: - """This class is essentially a wrapper for :py:class:`pa.RecordBatch`.""" - - def __init__(self, record_batch: df_internal.RecordBatch) -> None: - """This constructor is generally not called by the end user. - - See the :py:class:`RecordBatchStream` iterator for generating this class. - """ - self.record_batch = record_batch - - def to_pyarrow(self) -> pa.RecordBatch: - """Convert to :py:class:`pa.RecordBatch`.""" - return self.record_batch.to_pyarrow() - - def __arrow_c_array__( - self, requested_schema: object | None = None - ) -> tuple[object, object]: - """Export the record batch via the Arrow C Data Interface. - - This allows zero-copy interchange with libraries that support the - `Arrow PyCapsule interface `_. - - Args: - requested_schema: Attempt to provide the record batch using this - schema. Only straightforward projections such as column - selection or reordering are applied. - - Returns: - Two Arrow PyCapsule objects representing the ``ArrowArray`` and - ``ArrowSchema``. - """ - return self.record_batch.__arrow_c_array__(requested_schema) - - -class RecordBatchStream: - """This class represents a stream of record batches. - - These are typically the result of a - :py:func:`~datafusion.dataframe.DataFrame.execute_stream` operation. - """ - - def __init__(self, record_batch_stream: df_internal.RecordBatchStream) -> None: - """This constructor is typically not called by the end user.""" - self.rbs = record_batch_stream - - def next(self) -> RecordBatch: - """See :py:func:`__next__` for the iterator function.""" - return next(self) - - async def __anext__(self) -> RecordBatch: - """Return the next :py:class:`RecordBatch` in the stream asynchronously.""" - next_batch = await self.rbs.__anext__() - return RecordBatch(next_batch) - - def __next__(self) -> RecordBatch: - """Return the next :py:class:`RecordBatch` in the stream.""" - next_batch = next(self.rbs) - return RecordBatch(next_batch) - - def __aiter__(self) -> typing_extensions.Self: - """Return an asynchronous iterator over record batches.""" - return self - - def __iter__(self) -> typing_extensions.Self: - """Return an iterator over record batches.""" - return self diff --git a/python/datafusion/substrait.py b/python/datafusion/substrait.py deleted file mode 100644 index 3115238fa..000000000 --- a/python/datafusion/substrait.py +++ /dev/null @@ -1,213 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""This module provides support for using substrait with datafusion. - -For additional information about substrait, see https://substrait.io/ for more -information about substrait. -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -try: - from warnings import deprecated # Python 3.13+ -except ImportError: - from typing_extensions import deprecated # Python 3.12 - -from datafusion.plan import LogicalPlan - -from ._internal import substrait as substrait_internal - -if TYPE_CHECKING: - import pathlib - - from datafusion.context import SessionContext - -__all__ = [ - "Consumer", - "Plan", - "Producer", - "Serde", -] - - -class Plan: - """A class representing an encodable substrait plan.""" - - def __init__(self, plan: substrait_internal.Plan) -> None: - """Create a substrait plan. - - The user should not have to call this constructor directly. Rather, it - should be created via :py:class:`Serde` or py:class:`Producer` classes - in this module. - """ - self.plan_internal = plan - - def encode(self) -> bytes: - """Encode the plan to bytes. - - Returns: - Encoded plan. - """ - return self.plan_internal.encode() - - def to_json(self) -> str: - """Get the JSON representation of the Substrait plan. - - Returns: - A JSON representation of the Substrait plan. - """ - return self.plan_internal.to_json() - - @staticmethod - def from_json(json: str) -> Plan: - """Parse a plan from a JSON string representation. - - Args: - json: JSON representation of a Substrait plan. - - Returns: - Plan object representing the Substrait plan. - """ - return Plan(substrait_internal.Plan.from_json(json)) - - -@deprecated("Use `Plan` instead.") -class plan(Plan): # noqa: N801 - """See `Plan`.""" - - -class Serde: - """Provides the ``Substrait`` serialization and deserialization.""" - - @staticmethod - def serialize(sql: str, ctx: SessionContext, path: str | pathlib.Path) -> None: - """Serialize a SQL query to a Substrait plan and write it to a file. - - Args: - sql:SQL query to serialize. - ctx: SessionContext to use. - path: Path to write the Substrait plan to. - """ - return substrait_internal.Serde.serialize(sql, ctx.ctx, str(path)) - - @staticmethod - def serialize_to_plan(sql: str, ctx: SessionContext) -> Plan: - """Serialize a SQL query to a Substrait plan. - - Args: - sql: SQL query to serialize. - ctx: SessionContext to use. - - Returns: - Substrait plan. - """ - return Plan(substrait_internal.Serde.serialize_to_plan(sql, ctx.ctx)) - - @staticmethod - def serialize_bytes(sql: str, ctx: SessionContext) -> bytes: - """Serialize a SQL query to a Substrait plan as bytes. - - Args: - sql: SQL query to serialize. - ctx: SessionContext to use. - - Returns: - Substrait plan as bytes. - """ - return substrait_internal.Serde.serialize_bytes(sql, ctx.ctx) - - @staticmethod - def deserialize(path: str | pathlib.Path) -> Plan: - """Deserialize a Substrait plan from a file. - - Args: - path: Path to read the Substrait plan from. - - Returns: - Substrait plan. - """ - return Plan(substrait_internal.Serde.deserialize(str(path))) - - @staticmethod - def deserialize_bytes(proto_bytes: bytes) -> Plan: - """Deserialize a Substrait plan from bytes. - - Args: - proto_bytes: Bytes to read the Substrait plan from. - - Returns: - Substrait plan. - """ - return Plan(substrait_internal.Serde.deserialize_bytes(proto_bytes)) - - -@deprecated("Use `Serde` instead.") -class serde(Serde): # noqa: N801 - """See `Serde` instead.""" - - -class Producer: - """Generates substrait plans from a logical plan.""" - - @staticmethod - def to_substrait_plan(logical_plan: LogicalPlan, ctx: SessionContext) -> Plan: - """Convert a DataFusion LogicalPlan to a Substrait plan. - - Args: - logical_plan: LogicalPlan to convert. - ctx: SessionContext to use. - - Returns: - Substrait plan. - """ - return Plan( - substrait_internal.Producer.to_substrait_plan( - logical_plan._raw_plan, ctx.ctx - ) - ) - - -@deprecated("Use `Producer` instead.") -class producer(Producer): # noqa: N801 - """Use `Producer` instead.""" - - -class Consumer: - """Generates a logical plan from a substrait plan.""" - - @staticmethod - def from_substrait_plan(ctx: SessionContext, plan: Plan) -> LogicalPlan: - """Convert a Substrait plan to a DataFusion LogicalPlan. - - Args: - ctx: SessionContext to use. - plan: Substrait plan to convert. - - Returns: - LogicalPlan. - """ - return LogicalPlan( - substrait_internal.Consumer.from_substrait_plan(ctx.ctx, plan.plan_internal) - ) - - -@deprecated("Use `Consumer` instead.") -class consumer(Consumer): # noqa: N801 - """Use `Consumer` instead.""" diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py deleted file mode 100644 index c7265fa09..000000000 --- a/python/datafusion/udf.py +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Deprecated module for user defined functions.""" - -import warnings - -from datafusion.user_defined import * # noqa: F403 - -warnings.warn( - "The module 'udf' is deprecated and will be removed in the next release. " - "Please use 'user_defined' instead.", - DeprecationWarning, - stacklevel=2, -) diff --git a/python/datafusion/unparser.py b/python/datafusion/unparser.py deleted file mode 100644 index 7ca5b9190..000000000 --- a/python/datafusion/unparser.py +++ /dev/null @@ -1,80 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""This module provides support for unparsing datafusion plans to SQL. - -For additional information about unparsing, see https://docs.rs/datafusion-sql/latest/datafusion_sql/unparser/index.html -""" - -from ._internal import unparser as unparser_internal -from .plan import LogicalPlan - - -class Dialect: - """DataFusion data catalog.""" - - def __init__(self, dialect: unparser_internal.Dialect) -> None: - """This constructor is not typically called by the end user.""" - self.dialect = dialect - - @staticmethod - def default() -> "Dialect": - """Create a new default dialect.""" - return Dialect(unparser_internal.Dialect.default()) - - @staticmethod - def mysql() -> "Dialect": - """Create a new MySQL dialect.""" - return Dialect(unparser_internal.Dialect.mysql()) - - @staticmethod - def postgres() -> "Dialect": - """Create a new PostgreSQL dialect.""" - return Dialect(unparser_internal.Dialect.postgres()) - - @staticmethod - def sqlite() -> "Dialect": - """Create a new SQLite dialect.""" - return Dialect(unparser_internal.Dialect.sqlite()) - - @staticmethod - def duckdb() -> "Dialect": - """Create a new DuckDB dialect.""" - return Dialect(unparser_internal.Dialect.duckdb()) - - -class Unparser: - """DataFusion unparser.""" - - def __init__(self, dialect: Dialect) -> None: - """This constructor is not typically called by the end user.""" - self.unparser = unparser_internal.Unparser(dialect.dialect) - - def plan_to_sql(self, plan: LogicalPlan) -> str: - """Convert a logical plan to a SQL string.""" - return self.unparser.plan_to_sql(plan._raw_plan) - - def with_pretty(self, pretty: bool) -> "Unparser": - """Set the pretty flag.""" - self.unparser = self.unparser.with_pretty(pretty) - return self - - -__all__ = [ - "Dialect", - "Unparser", -] diff --git a/python/datafusion/user_defined.py b/python/datafusion/user_defined.py deleted file mode 100644 index eef23e741..000000000 --- a/python/datafusion/user_defined.py +++ /dev/null @@ -1,1044 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Provides the user-defined functions for evaluation of dataframes.""" - -from __future__ import annotations - -import functools -from abc import ABCMeta, abstractmethod -from enum import Enum -from typing import TYPE_CHECKING, Any, Protocol, TypeGuard, TypeVar, cast, overload - -import pyarrow as pa - -import datafusion._internal as df_internal -from datafusion import SessionContext -from datafusion.expr import Expr - -if TYPE_CHECKING: - from _typeshed import CapsuleType as _PyCapsule - - _R = TypeVar("_R", bound=pa.DataType) - from collections.abc import Callable, Sequence - - -class Volatility(Enum): - """Defines how stable or volatile a function is. - - When setting the volatility of a function, you can either pass this - enumeration or a ``str``. The ``str`` equivalent is the lower case value of the - name (`"immutable"`, `"stable"`, or `"volatile"`). - """ - - Immutable = 1 - """An immutable function will always return the same output when given the - same input. - - DataFusion will attempt to inline immutable functions during planning. - """ - - Stable = 2 - """ - Returns the same value for a given input within a single queries. - - A stable function may return different values given the same input across - different queries but must return the same value for a given input within a - query. An example of this is the ``Now`` function. DataFusion will attempt to - inline ``Stable`` functions during planning, when possible. For query - ``select col1, now() from t1``, it might take a while to execute but ``now()`` - column will be the same for each output row, which is evaluated during - planning. - """ - - Volatile = 3 - """A volatile function may change the return value from evaluation to - evaluation. - - Multiple invocations of a volatile function may return different results - when used in the same query. An example of this is the random() function. - DataFusion can not evaluate such functions during planning. In the query - ``select col1, random() from t1``, ``random()`` function will be evaluated - for each output row, resulting in a unique random value for each row. - """ - - def __str__(self) -> str: - """Returns the string equivalent.""" - return self.name.lower() - - -def data_type_or_field_to_field(value: pa.DataType | pa.Field, name: str) -> pa.Field: - """Helper function to return a Field from either a Field or DataType.""" - if isinstance(value, pa.Field): - return value - return pa.field(name, type=value) - - -def data_types_or_fields_to_field_list( - inputs: Sequence[pa.Field | pa.DataType] | pa.Field | pa.DataType, -) -> list[pa.Field]: - """Helper function to return a list of Fields.""" - if isinstance(inputs, pa.DataType): - return [pa.field("value", type=inputs)] - if isinstance(inputs, pa.Field): - return [inputs] - - return [ - data_type_or_field_to_field(v, f"value_{idx}") for (idx, v) in enumerate(inputs) - ] - - -class ScalarUDFExportable(Protocol): - """Type hint for object that has __datafusion_scalar_udf__ PyCapsule.""" - - def __datafusion_scalar_udf__(self) -> object: ... # noqa: D105 - - -def _is_pycapsule(value: object) -> TypeGuard[_PyCapsule]: - """Return ``True`` when ``value`` is a CPython ``PyCapsule``.""" - return value.__class__.__name__ == "PyCapsule" - - -class ScalarUDF: - """Class for performing scalar user-defined functions (UDF). - - Scalar UDFs operate on a row by row basis. See also :py:class:`AggregateUDF` for - operating on a group of rows. - """ - - def __init__( - self, - name: str, - func: Callable[..., _R], - input_fields: list[pa.Field], - return_field: _R, - volatility: Volatility | str, - ) -> None: - """Instantiate a scalar user-defined function (UDF). - - See helper method :py:func:`udf` for argument details. - """ - if hasattr(func, "__datafusion_scalar_udf__"): - self._udf = df_internal.ScalarUDF.from_pycapsule(func) - return - if isinstance(input_fields, pa.DataType): - input_fields = [input_fields] - self._udf = df_internal.ScalarUDF( - name, func, input_fields, return_field, str(volatility) - ) - - def __repr__(self) -> str: - """Print a string representation of the Scalar UDF.""" - return self._udf.__repr__() - - def __call__(self, *args: Expr) -> Expr: - """Execute the UDF. - - This function is not typically called by an end user. These calls will - occur during the evaluation of the dataframe. - """ - args_raw = [arg.expr for arg in args] - return Expr(self._udf.__call__(*args_raw)) - - @overload - @staticmethod - def udf( - input_fields: Sequence[pa.DataType | pa.Field] | pa.DataType | pa.Field, - return_field: pa.DataType | pa.Field, - volatility: Volatility | str, - name: str | None = None, - ) -> Callable[..., ScalarUDF]: ... - - @overload - @staticmethod - def udf( - func: Callable[..., _R], - input_fields: Sequence[pa.DataType | pa.Field] | pa.DataType | pa.Field, - return_field: pa.DataType | pa.Field, - volatility: Volatility | str, - name: str | None = None, - ) -> ScalarUDF: ... - - @overload - @staticmethod - def udf(func: ScalarUDFExportable) -> ScalarUDF: ... - - @staticmethod - def udf(*args: Any, **kwargs: Any): # noqa: D417 - """Create a new User-Defined Function (UDF). - - This class can be used both as either a function or a decorator. - - Usage: - - As a function: ``udf(func, input_fields, return_field, volatility, name)``. - - As a decorator: ``@udf(input_fields, return_field, volatility, name)``. - When used a decorator, do **not** pass ``func`` explicitly. - - In lieu of passing a PyArrow Field, you can pass a DataType for simplicity. - When you do so, it will be assumed that the nullability of the inputs and - output are True and that they have no metadata. - - Args: - func (Callable, optional): Only needed when calling as a function. - Skip this argument when using `udf` as a decorator. If you have a Rust - backed ScalarUDF within a PyCapsule, you can pass this parameter - and ignore the rest. They will be determined directly from the - underlying function. See the online documentation for more information. - input_fields (list[pa.Field | pa.DataType]): The data types or Fields - of the arguments to ``func``. This list must be of the same length - as the number of arguments. - return_field (_R): The field of the return value from the function. - volatility (Volatility | str): See `Volatility` for allowed values. - name (Optional[str]): A descriptive name for the function. - - Returns: - A user-defined function that can be used in SQL expressions, - data aggregation, or window function calls. - - Example: Using ``udf`` as a function:: - - def double_func(x): - return x * 2 - double_udf = udf(double_func, [pa.int32()], pa.int32(), - "volatile", "double_it") - - Example: Using ``udf`` as a decorator:: - - @udf([pa.int32()], pa.int32(), "volatile", "double_it") - def double_udf(x): - return x * 2 - """ # noqa: W505 E501 - - def _function( - func: Callable[..., _R], - input_fields: Sequence[pa.DataType | pa.Field] | pa.DataType | pa.Field, - return_field: pa.DataType | pa.Field, - volatility: Volatility | str, - name: str | None = None, - ) -> ScalarUDF: - if not callable(func): - msg = "`func` argument must be callable" - raise TypeError(msg) - if name is None: - if hasattr(func, "__qualname__"): - name = func.__qualname__.lower() - else: - name = func.__class__.__name__.lower() - input_fields = data_types_or_fields_to_field_list(input_fields) - return_field = data_type_or_field_to_field(return_field, "value") - return ScalarUDF( - name=name, - func=func, - input_fields=input_fields, - return_field=return_field, - volatility=volatility, - ) - - def _decorator( - input_fields: Sequence[pa.DataType | pa.Field] | pa.DataType | pa.Field, - return_field: _R, - volatility: Volatility | str, - name: str | None = None, - ) -> Callable: - def decorator(func: Callable) -> Callable: - udf_caller = ScalarUDF.udf( - func, input_fields, return_field, volatility, name - ) - - @functools.wraps(func) - def wrapper(*args: Any, **kwargs: Any) -> Callable: - return udf_caller(*args, **kwargs) - - return wrapper - - return decorator - - if hasattr(args[0], "__datafusion_scalar_udf__"): - return ScalarUDF.from_pycapsule(args[0]) - - if args and callable(args[0]): - # Case 1: Used as a function, require the first parameter to be callable - return _function(*args, **kwargs) - # Case 2: Used as a decorator with parameters - return _decorator(*args, **kwargs) - - @staticmethod - def from_pycapsule(func: ScalarUDFExportable) -> ScalarUDF: - """Create a Scalar UDF from ScalarUDF PyCapsule object. - - This function will instantiate a Scalar UDF that uses a DataFusion - ScalarUDF that is exported via the FFI bindings. - """ - name = str(func.__class__) - return ScalarUDF( - name=name, - func=func, - input_fields=None, - return_field=None, - volatility=None, - ) - - -class Accumulator(metaclass=ABCMeta): - """Defines how an :py:class:`AggregateUDF` accumulates values.""" - - @abstractmethod - def state(self) -> list[pa.Scalar]: - """Return the current state. - - While this function template expects PyArrow Scalar values return type, - you can return any value that can be converted into a Scalar. This - includes basic Python data types such as integers and strings. In - addition to primitive types, we currently support PyArrow, nanoarrow, - and arro3 objects in addition to primitive data types. Other objects - that support the Arrow FFI standard will be given a "best attempt" at - conversion to scalar objects. - """ - - @abstractmethod - def update(self, *values: pa.Array) -> None: - """Evaluate an array of values and update state.""" - - @abstractmethod - def merge(self, states: list[pa.Array]) -> None: - """Merge a set of states.""" - - @abstractmethod - def evaluate(self) -> pa.Scalar: - """Return the resultant value. - - While this function template expects a PyArrow Scalar value return type, - you can return any value that can be converted into a Scalar. This - includes basic Python data types such as integers and strings. In - addition to primitive types, we currently support PyArrow, nanoarrow, - and arro3 objects in addition to primitive data types. Other objects - that support the Arrow FFI standard will be given a "best attempt" at - conversion to scalar objects. - """ - - -class AggregateUDFExportable(Protocol): - """Type hint for object that has __datafusion_aggregate_udf__ PyCapsule.""" - - def __datafusion_aggregate_udf__(self) -> object: ... # noqa: D105 - - -class AggregateUDF: - """Class for performing scalar user-defined functions (UDF). - - Aggregate UDFs operate on a group of rows and return a single value. See - also :py:class:`ScalarUDF` for operating on a row by row basis. - """ - - @overload - def __init__( - self, - name: str, - accumulator: Callable[[], Accumulator], - input_types: list[pa.DataType], - return_type: pa.DataType, - state_type: list[pa.DataType], - volatility: Volatility | str, - ) -> None: ... - - @overload - def __init__( - self, - name: str, - accumulator: AggregateUDFExportable, - input_types: None = ..., - return_type: None = ..., - state_type: None = ..., - volatility: None = ..., - ) -> None: ... - - def __init__( - self, - name: str, - accumulator: Callable[[], Accumulator] | AggregateUDFExportable, - input_types: list[pa.DataType] | None, - return_type: pa.DataType | None, - state_type: list[pa.DataType] | None, - volatility: Volatility | str | None, - ) -> None: - """Instantiate a user-defined aggregate function (UDAF). - - See :py:func:`udaf` for a convenience function and argument - descriptions. - """ - if hasattr(accumulator, "__datafusion_aggregate_udf__"): - self._udaf = df_internal.AggregateUDF.from_pycapsule(accumulator) - return - if ( - input_types is None - or return_type is None - or state_type is None - or volatility is None - ): - msg = ( - "`input_types`, `return_type`, `state_type`, and `volatility` " - "must be provided when `accumulator` is callable." - ) - raise TypeError(msg) - - self._udaf = df_internal.AggregateUDF( - name, - accumulator, - input_types, - return_type, - state_type, - str(volatility), - ) - - def __repr__(self) -> str: - """Print a string representation of the Aggregate UDF.""" - return self._udaf.__repr__() - - def __call__(self, *args: Expr) -> Expr: - """Execute the UDAF. - - This function is not typically called by an end user. These calls will - occur during the evaluation of the dataframe. - """ - args_raw = [arg.expr for arg in args] - return Expr(self._udaf.__call__(*args_raw)) - - @overload - @staticmethod - def udaf( - input_types: pa.DataType | list[pa.DataType], - return_type: pa.DataType, - state_type: list[pa.DataType], - volatility: Volatility | str, - name: str | None = None, - ) -> Callable[..., AggregateUDF]: ... - - @overload - @staticmethod - def udaf( - accum: Callable[[], Accumulator], - input_types: pa.DataType | list[pa.DataType], - return_type: pa.DataType, - state_type: list[pa.DataType], - volatility: Volatility | str, - name: str | None = None, - ) -> AggregateUDF: ... - - @overload - @staticmethod - def udaf(accum: AggregateUDFExportable) -> AggregateUDF: ... - - @overload - @staticmethod - def udaf(accum: _PyCapsule) -> AggregateUDF: ... - - @staticmethod - def udaf(*args: Any, **kwargs: Any): # noqa: D417, C901 - """Create a new User-Defined Aggregate Function (UDAF). - - This class allows you to define an aggregate function that can be used in - data aggregation or window function calls. - - Usage: - - As a function: ``udaf(accum, input_types, return_type, state_type, volatility, name)``. - - As a decorator: ``@udaf(input_types, return_type, state_type, volatility, name)``. - When using ``udaf`` as a decorator, do not pass ``accum`` explicitly. - - Function example: - - If your :py:class:`Accumulator` can be instantiated with no arguments, you - can simply pass it's type as `accum`. If you need to pass additional - arguments to it's constructor, you can define a lambda or a factory method. - During runtime the :py:class:`Accumulator` will be constructed for every - instance in which this UDAF is used. The following examples are all valid:: - - import pyarrow as pa - import pyarrow.compute as pc - - class Summarize(Accumulator): - def __init__(self, bias: float = 0.0): - self._sum = pa.scalar(bias) - - def state(self) -> list[pa.Scalar]: - return [self._sum] - - def update(self, values: pa.Array) -> None: - self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py()) - - def merge(self, states: list[pa.Array]) -> None: - self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py()) - - def evaluate(self) -> pa.Scalar: - return self._sum - - def sum_bias_10() -> Summarize: - return Summarize(10.0) - - udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()], - "immutable") - udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()], - "immutable") - udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(), - [pa.float64()], "immutable") - - Decorator example::: - - @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable") - def udf4() -> Summarize: - return Summarize(10.0) - - Args: - accum: The accumulator python function. Only needed when calling as a - function. Skip this argument when using ``udaf`` as a decorator. - If you have a Rust backed AggregateUDF within a PyCapsule, you can - pass this parameter and ignore the rest. They will be determined - directly from the underlying function. See the online documentation - for more information. - input_types: The data types of the arguments to ``accum``. - return_type: The data type of the return value. - state_type: The data types of the intermediate accumulation. - volatility: See :py:class:`Volatility` for allowed values. - name: A descriptive name for the function. - - Returns: - A user-defined aggregate function, which can be used in either data - aggregation or window function calls. - """ # noqa: E501 W505 - - def _function( - accum: Callable[[], Accumulator], - input_types: pa.DataType | list[pa.DataType], - return_type: pa.DataType, - state_type: list[pa.DataType], - volatility: Volatility | str, - name: str | None = None, - ) -> AggregateUDF: - if not callable(accum): - msg = "`func` must be callable." - raise TypeError(msg) - if not isinstance(accum(), Accumulator): - msg = "Accumulator must implement the abstract base class Accumulator" - raise TypeError(msg) - if name is None: - name = accum().__class__.__qualname__.lower() - if isinstance(input_types, pa.DataType): - input_types = [input_types] - return AggregateUDF( - name=name, - accumulator=accum, - input_types=input_types, - return_type=return_type, - state_type=state_type, - volatility=volatility, - ) - - def _decorator( - input_types: pa.DataType | list[pa.DataType], - return_type: pa.DataType, - state_type: list[pa.DataType], - volatility: Volatility | str, - name: str | None = None, - ) -> Callable[..., Callable[..., Expr]]: - def decorator(accum: Callable[[], Accumulator]) -> Callable[..., Expr]: - udaf_caller = AggregateUDF.udaf( - accum, input_types, return_type, state_type, volatility, name - ) - - @functools.wraps(accum) - def wrapper(*args: Any, **kwargs: Any) -> Expr: - return udaf_caller(*args, **kwargs) - - return wrapper - - return decorator - - if hasattr(args[0], "__datafusion_aggregate_udf__") or _is_pycapsule(args[0]): - return AggregateUDF.from_pycapsule(args[0]) - - if args and callable(args[0]): - # Case 1: Used as a function, require the first parameter to be callable - return _function(*args, **kwargs) - # Case 2: Used as a decorator with parameters - return _decorator(*args, **kwargs) - - @staticmethod - def from_pycapsule(func: AggregateUDFExportable | _PyCapsule) -> AggregateUDF: - """Create an Aggregate UDF from AggregateUDF PyCapsule object. - - This function will instantiate a Aggregate UDF that uses a DataFusion - AggregateUDF that is exported via the FFI bindings. - """ - if _is_pycapsule(func): - aggregate = cast("AggregateUDF", object.__new__(AggregateUDF)) - aggregate._udaf = df_internal.AggregateUDF.from_pycapsule(func) - return aggregate - - capsule = cast("AggregateUDFExportable", func) - name = str(capsule.__class__) - return AggregateUDF( - name=name, - accumulator=capsule, - input_types=None, - return_type=None, - state_type=None, - volatility=None, - ) - - -class WindowEvaluator: - """Evaluator class for user-defined window functions (UDWF). - - It is up to the user to decide which evaluate function is appropriate. - - +------------------------+--------------------------------+------------------+---------------------------+ - | ``uses_window_frame`` | ``supports_bounded_execution`` | ``include_rank`` | function_to_implement | - +========================+================================+==================+===========================+ - | False (default) | False (default) | False (default) | ``evaluate_all`` | - +------------------------+--------------------------------+------------------+---------------------------+ - | False | True | False | ``evaluate`` | - +------------------------+--------------------------------+------------------+---------------------------+ - | False | True/False | True | ``evaluate_all_with_rank``| - +------------------------+--------------------------------+------------------+---------------------------+ - | True | True/False | True/False | ``evaluate`` | - +------------------------+--------------------------------+------------------+---------------------------+ - """ # noqa: W505, E501 - - def memoize(self) -> None: - """Perform a memoize operation to improve performance. - - When the window frame has a fixed beginning (e.g UNBOUNDED - PRECEDING), some functions such as FIRST_VALUE and - NTH_VALUE do not need the (unbounded) input once they have - seen a certain amount of input. - - `memoize` is called after each input batch is processed, and - such functions can save whatever they need - """ - - def get_range(self, idx: int, num_rows: int) -> tuple[int, int]: # noqa: ARG002 - """Return the range for the window function. - - If `uses_window_frame` flag is `false`. This method is used to - calculate required range for the window function during - stateful execution. - - Generally there is no required range, hence by default this - returns smallest range(current row). e.g seeing current row is - enough to calculate window result (such as row_number, rank, - etc) - - Args: - idx:: Current index - num_rows: Number of rows. - """ - return (idx, idx + 1) - - def is_causal(self) -> bool: - """Get whether evaluator needs future data for its result.""" - return False - - def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array: - """Evaluate a window function on an entire input partition. - - This function is called once per input *partition* for window functions that - *do not use* values from the window frame, such as - :py:func:`~datafusion.functions.row_number`, - :py:func:`~datafusion.functions.rank`, - :py:func:`~datafusion.functions.dense_rank`, - :py:func:`~datafusion.functions.percent_rank`, - :py:func:`~datafusion.functions.cume_dist`, - :py:func:`~datafusion.functions.lead`, - and :py:func:`~datafusion.functions.lag`. - - It produces the result of all rows in a single pass. It - expects to receive the entire partition as the ``value`` and - must produce an output column with one output row for every - input row. - - ``num_rows`` is required to correctly compute the output in case - ``len(values) == 0`` - - Implementing this function is an optimization. Certain window - functions are not affected by the window frame definition or - the query doesn't have a frame, and ``evaluate`` skips the - (costly) window frame boundary calculation and the overhead of - calling ``evaluate`` for each output row. - - For example, the `LAG` built in window function does not use - the values of its window frame (it can be computed in one shot - on the entire partition with ``Self::evaluate_all`` regardless of the - window defined in the ``OVER`` clause) - - .. code-block:: text - - lag(x, 1) OVER (ORDER BY z ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING) - - However, ``avg()`` computes the average in the window and thus - does use its window frame. - - .. code-block:: text - - avg(x) OVER (PARTITION BY y ORDER BY z ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING) - """ # noqa: W505, E501 - - def evaluate( - self, values: list[pa.Array], eval_range: tuple[int, int] - ) -> pa.Scalar: - """Evaluate window function on a range of rows in an input partition. - - This is the simplest and most general function to implement - but also the least performant as it creates output one row at - a time. It is typically much faster to implement stateful - evaluation using one of the other specialized methods on this - trait. - - Returns a [`ScalarValue`] that is the value of the window - function within `range` for the entire partition. Argument - `values` contains the evaluation result of function arguments - and evaluation results of ORDER BY expressions. If function has a - single argument, `values[1..]` will contain ORDER BY expression results. - """ - - def evaluate_all_with_rank( - self, num_rows: int, ranks_in_partition: list[tuple[int, int]] - ) -> pa.Array: - """Called for window functions that only need the rank of a row. - - Evaluate the partition evaluator against the partition using - the row ranks. For example, ``rank(col("a"))`` produces - - .. code-block:: text - - a | rank - - + ---- - A | 1 - A | 1 - C | 3 - D | 4 - D | 4 - - For this case, `num_rows` would be `5` and the - `ranks_in_partition` would be called with - - .. code-block:: text - - [ - (0,1), - (2,2), - (3,4), - ] - - The user must implement this method if ``include_rank`` returns True. - """ - - def supports_bounded_execution(self) -> bool: - """Can the window function be incrementally computed using bounded memory?""" - return False - - def uses_window_frame(self) -> bool: - """Does the window function use the values from the window frame?""" - return False - - def include_rank(self) -> bool: - """Can this function be evaluated with (only) rank?""" - return False - - -class WindowUDFExportable(Protocol): - """Type hint for object that has __datafusion_window_udf__ PyCapsule.""" - - def __datafusion_window_udf__(self) -> object: ... # noqa: D105 - - -class WindowUDF: - """Class for performing window user-defined functions (UDF). - - Window UDFs operate on a partition of rows. See - also :py:class:`ScalarUDF` for operating on a row by row basis. - """ - - def __init__( - self, - name: str, - func: Callable[[], WindowEvaluator], - input_types: list[pa.DataType], - return_type: pa.DataType, - volatility: Volatility | str, - ) -> None: - """Instantiate a user-defined window function (UDWF). - - See :py:func:`udwf` for a convenience function and argument - descriptions. - """ - if hasattr(func, "__datafusion_window_udf__"): - self._udwf = df_internal.WindowUDF.from_pycapsule(func) - return - self._udwf = df_internal.WindowUDF( - name, func, input_types, return_type, str(volatility) - ) - - def __repr__(self) -> str: - """Print a string representation of the Window UDF.""" - return self._udwf.__repr__() - - def __call__(self, *args: Expr) -> Expr: - """Execute the UDWF. - - This function is not typically called by an end user. These calls will - occur during the evaluation of the dataframe. - """ - args_raw = [arg.expr for arg in args] - return Expr(self._udwf.__call__(*args_raw)) - - @overload - @staticmethod - def udwf( - input_types: pa.DataType | list[pa.DataType], - return_type: pa.DataType, - volatility: Volatility | str, - name: str | None = None, - ) -> Callable[..., WindowUDF]: ... - - @overload - @staticmethod - def udwf( - func: Callable[[], WindowEvaluator], - input_types: pa.DataType | list[pa.DataType], - return_type: pa.DataType, - volatility: Volatility | str, - name: str | None = None, - ) -> WindowUDF: ... - - @staticmethod - def udwf(*args: Any, **kwargs: Any): # noqa: D417 - """Create a new User-Defined Window Function (UDWF). - - This class can be used both as either a function or a decorator. - - Usage: - - As a function: ``udwf(func, input_types, return_type, volatility, name)``. - - As a decorator: ``@udwf(input_types, return_type, volatility, name)``. - When using ``udwf`` as a decorator, do not pass ``func`` explicitly. - - Function example:: - - import pyarrow as pa - - class BiasedNumbers(WindowEvaluator): - def __init__(self, start: int = 0) -> None: - self.start = start - - def evaluate_all(self, values: list[pa.Array], - num_rows: int) -> pa.Array: - return pa.array([self.start + i for i in range(num_rows)]) - - def bias_10() -> BiasedNumbers: - return BiasedNumbers(10) - - udwf1 = udwf(BiasedNumbers, pa.int64(), pa.int64(), "immutable") - udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable") - udwf3 = udwf(lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable") - - - Decorator example:: - - @udwf(pa.int64(), pa.int64(), "immutable") - def biased_numbers() -> BiasedNumbers: - return BiasedNumbers(10) - - Args: - func: Only needed when calling as a function. Skip this argument when - using ``udwf`` as a decorator. If you have a Rust backed WindowUDF - within a PyCapsule, you can pass this parameter and ignore the rest. - They will be determined directly from the underlying function. See - the online documentation for more information. - input_types: The data types of the arguments. - return_type: The data type of the return value. - volatility: See :py:class:`Volatility` for allowed values. - name: A descriptive name for the function. - - Returns: - A user-defined window function that can be used in window function calls. - """ - if hasattr(args[0], "__datafusion_window_udf__"): - return WindowUDF.from_pycapsule(args[0]) - - if args and callable(args[0]): - # Case 1: Used as a function, require the first parameter to be callable - return WindowUDF._create_window_udf(*args, **kwargs) - # Case 2: Used as a decorator with parameters - return WindowUDF._create_window_udf_decorator(*args, **kwargs) - - @staticmethod - def _create_window_udf( - func: Callable[[], WindowEvaluator], - input_types: pa.DataType | list[pa.DataType], - return_type: pa.DataType, - volatility: Volatility | str, - name: str | None = None, - ) -> WindowUDF: - """Create a WindowUDF instance from function arguments.""" - if not callable(func): - msg = "`func` must be callable." - raise TypeError(msg) - if not isinstance(func(), WindowEvaluator): - msg = "`func` must implement the abstract base class WindowEvaluator" - raise TypeError(msg) - - name = name or func.__qualname__.lower() - input_types = ( - [input_types] if isinstance(input_types, pa.DataType) else input_types - ) - - return WindowUDF(name, func, input_types, return_type, volatility) - - @staticmethod - def _get_default_name(func: Callable) -> str: - """Get the default name for a function based on its attributes.""" - if hasattr(func, "__qualname__"): - return func.__qualname__.lower() - return func.__class__.__name__.lower() - - @staticmethod - def _normalize_input_types( - input_types: pa.DataType | list[pa.DataType], - ) -> list[pa.DataType]: - """Convert a single DataType to a list if needed.""" - if isinstance(input_types, pa.DataType): - return [input_types] - return input_types - - @staticmethod - def _create_window_udf_decorator( - input_types: pa.DataType | list[pa.DataType], - return_type: pa.DataType, - volatility: Volatility | str, - name: str | None = None, - ) -> Callable[[Callable[[], WindowEvaluator]], Callable[..., Expr]]: - """Create a decorator for a WindowUDF.""" - - def decorator(func: Callable[[], WindowEvaluator]) -> Callable[..., Expr]: - udwf_caller = WindowUDF._create_window_udf( - func, input_types, return_type, volatility, name - ) - - @functools.wraps(func) - def wrapper(*args: Any, **kwargs: Any) -> Expr: - return udwf_caller(*args, **kwargs) - - return wrapper - - return decorator - - @staticmethod - def from_pycapsule(func: WindowUDFExportable) -> WindowUDF: - """Create a Window UDF from WindowUDF PyCapsule object. - - This function will instantiate a Window UDF that uses a DataFusion - WindowUDF that is exported via the FFI bindings. - """ - name = str(func.__class__) - return WindowUDF( - name=name, - func=func, - input_types=None, - return_type=None, - volatility=None, - ) - - -class TableFunction: - """Class for performing user-defined table functions (UDTF). - - Table functions generate new table providers based on the - input expressions. - """ - - def __init__( - self, name: str, func: Callable[[], any], ctx: SessionContext | None = None - ) -> None: - """Instantiate a user-defined table function (UDTF). - - See :py:func:`udtf` for a convenience function and argument - descriptions. - """ - self._udtf = df_internal.TableFunction(name, func, ctx) - - def __call__(self, *args: Expr) -> Any: - """Execute the UDTF and return a table provider.""" - args_raw = [arg.expr for arg in args] - return self._udtf.__call__(*args_raw) - - @overload - @staticmethod - def udtf( - name: str, - ) -> Callable[..., Any]: ... - - @overload - @staticmethod - def udtf( - func: Callable[[], Any], - name: str, - ) -> TableFunction: ... - - @staticmethod - def udtf(*args: Any, **kwargs: Any): - """Create a new User-Defined Table Function (UDTF).""" - if args and callable(args[0]): - # Case 1: Used as a function, require the first parameter to be callable - return TableFunction._create_table_udf(*args, **kwargs) - if args and hasattr(args[0], "__datafusion_table_function__"): - # Case 2: We have a datafusion FFI provided function - return TableFunction(args[1], args[0]) - # Case 3: Used as a decorator with parameters - return TableFunction._create_table_udf_decorator(*args, **kwargs) - - @staticmethod - def _create_table_udf( - func: Callable[..., Any], - name: str, - ) -> TableFunction: - """Create a TableFunction instance from function arguments.""" - if not callable(func): - msg = "`func` must be callable." - raise TypeError(msg) - - return TableFunction(name, func) - - @staticmethod - def _create_table_udf_decorator( - name: str | None = None, - ) -> Callable[[Callable[[], WindowEvaluator]], Callable[..., Expr]]: - """Create a decorator for a WindowUDF.""" - - def decorator(func: Callable[[], WindowEvaluator]) -> Callable[..., Expr]: - return TableFunction._create_table_udf(func, name) - - return decorator - - def __repr__(self) -> str: - """User printable representation.""" - return self._udtf.__repr__() - - -# Convenience exports so we can import instead of treating as -# variables at the package root -udf = ScalarUDF.udf -udaf = AggregateUDF.udaf -udwf = WindowUDF.udwf -udtf = TableFunction.udtf diff --git a/python/tests/__init__.py b/python/tests/__init__.py deleted file mode 100644 index 13a83393a..000000000 --- a/python/tests/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/python/tests/conftest.py b/python/tests/conftest.py deleted file mode 100644 index 26ed7281d..000000000 --- a/python/tests/conftest.py +++ /dev/null @@ -1,60 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pyarrow as pa -import pytest -from datafusion import DataFrame, SessionContext -from pyarrow.csv import write_csv - - -@pytest.fixture -def ctx(): - return SessionContext() - - -@pytest.fixture -def database(ctx, tmp_path): - path = tmp_path / "test.csv" - - table = pa.Table.from_arrays( - [ - [1, 2, 3, 4], - ["a", "b", "c", "d"], - [1.1, 2.2, 3.3, 4.4], - ], - names=["int", "str", "float"], - ) - write_csv(table, path) - - ctx.register_csv("csv", path) - ctx.register_csv("csv1", str(path)) - ctx.register_csv( - "csv2", - path, - has_header=True, - delimiter=",", - schema_infer_max_records=10, - ) - - -@pytest.fixture -def fail_collect(monkeypatch): - def _fail_collect(self, *args, **kwargs): # pragma: no cover - failure path - msg = "collect should not be called" - raise AssertionError(msg) - - monkeypatch.setattr(DataFrame, "collect", _fail_collect) diff --git a/python/tests/data_test_context/data.json b/python/tests/data_test_context/data.json deleted file mode 100644 index ff895b61f..000000000 --- a/python/tests/data_test_context/data.json +++ /dev/null @@ -1,3 +0,0 @@ -{"A": "a", "B": 1} -{"A": "b", "B": 2} -{"A": "c", "B": 3} diff --git a/python/tests/generic.py b/python/tests/generic.py deleted file mode 100644 index 1b98fdf9e..000000000 --- a/python/tests/generic.py +++ /dev/null @@ -1,82 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import datetime -from datetime import timezone - -import numpy as np -import pyarrow as pa -import pyarrow.csv - -# used to write parquet files -import pyarrow.parquet as pq - - -def data(): - rng = np.random.default_rng(1) - data = np.concatenate( - [ - rng.normal(0, 0.01, size=50), - rng.normal(50, 0.01, size=50), - ] - ) - return pa.array(data) - - -def data_with_nans(): - rng = np.random.default_rng(0) - data = rng.normal(0, 0.01, size=50) - mask = rng.normal(0, 2, size=50) - data[mask == 0] = np.nan - return data - - -def data_datetime(f): - data = [ - datetime.datetime.now(tz=timezone.utc), - datetime.datetime.now(tz=timezone.utc) - datetime.timedelta(days=1), - datetime.datetime.now(tz=timezone.utc) + datetime.timedelta(days=1), - ] - return pa.array(data, type=pa.timestamp(f), mask=np.array([False, True, False])) - - -def data_date32(): - data = [ - datetime.date(2000, 1, 1), - datetime.date(1980, 1, 1), - datetime.date(2030, 1, 1), - ] - return pa.array(data, type=pa.date32(), mask=np.array([False, True, False])) - - -def data_timedelta(f): - data = [ - datetime.timedelta(days=100), - datetime.timedelta(days=1), - datetime.timedelta(seconds=1), - ] - return pa.array(data, type=pa.duration(f), mask=np.array([False, True, False])) - - -def data_binary_other(): - return np.array([1, 0, 0], dtype="u4") - - -def write_parquet(path, data): - table = pa.Table.from_arrays([data], names=["a"]) - pq.write_table(table, path) - return str(path) diff --git a/python/tests/test_aggregation.py b/python/tests/test_aggregation.py deleted file mode 100644 index 240332848..000000000 --- a/python/tests/test_aggregation.py +++ /dev/null @@ -1,480 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import numpy as np -import pyarrow as pa -import pytest -from datafusion import SessionContext, column, lit -from datafusion import functions as f -from datafusion.common import NullTreatment - - -@pytest.fixture -def df(): - ctx = SessionContext() - - # create a RecordBatch and a new DataFrame from it - batch = pa.RecordBatch.from_arrays( - [ - pa.array([1, 2, 3]), - pa.array([4, 4, 6]), - pa.array([9, 8, 5]), - pa.array([True, True, False]), - pa.array([1, 2, None]), - ], - names=["a", "b", "c", "d", "e"], - ) - return ctx.create_dataframe([[batch]]) - - -@pytest.fixture -def df_partitioned(): - ctx = SessionContext() - - # create a RecordBatch and a new DataFrame from it - batch = pa.RecordBatch.from_arrays( - [ - pa.array([0, 1, 2, 3, 4, 5, 6]), - pa.array([7, None, 7, 8, 9, None, 9]), - pa.array(["A", "A", "A", "A", "B", "B", "B"]), - ], - names=["a", "b", "c"], - ) - - return ctx.create_dataframe([[batch]]) - - -@pytest.fixture -def df_aggregate_100(): - ctx = SessionContext() - ctx.register_csv("aggregate_test_data", "./testing/data/csv/aggregate_test_100.csv") - return ctx.table("aggregate_test_data") - - -@pytest.mark.parametrize( - ("agg_expr", "calc_expected"), - [ - (f.avg(column("a")), lambda a, b, c, d: np.array(np.average(a))), - ( - f.corr(column("a"), column("b")), - lambda a, b, c, d: np.array(np.corrcoef(a, b)[0][1]), - ), - (f.count(column("a")), lambda a, b, c, d: pa.array([len(a)])), - # Sample (co)variance -> ddof=1 - # Population (co)variance -> ddof=0 - ( - f.covar(column("a"), column("b")), - lambda a, b, c, d: np.array(np.cov(a, b, ddof=1)[0][1]), - ), - ( - f.covar_pop(column("a"), column("c")), - lambda a, b, c, d: np.array(np.cov(a, c, ddof=0)[0][1]), - ), - ( - f.covar_samp(column("b"), column("c")), - lambda a, b, c, d: np.array(np.cov(b, c, ddof=1)[0][1]), - ), - # f.grouping(col_a), # noqa: ERA001 No physical plan implemented yet - (f.max(column("a")), lambda a, b, c, d: np.array(np.max(a))), - (f.mean(column("b")), lambda a, b, c, d: np.array(np.mean(b))), - (f.median(column("b")), lambda a, b, c, d: np.array(np.median(b))), - (f.min(column("a")), lambda a, b, c, d: np.array(np.min(a))), - (f.sum(column("b")), lambda a, b, c, d: np.array(np.sum(b.to_pylist()))), - # Sample stdev -> ddof=1 - # Population stdev -> ddof=0 - (f.stddev(column("a")), lambda a, b, c, d: np.array(np.std(a, ddof=1))), - (f.stddev_pop(column("b")), lambda a, b, c, d: np.array(np.std(b, ddof=0))), - (f.stddev_samp(column("c")), lambda a, b, c, d: np.array(np.std(c, ddof=1))), - (f.var(column("a")), lambda a, b, c, d: np.array(np.var(a, ddof=1))), - (f.var_pop(column("b")), lambda a, b, c, d: np.array(np.var(b, ddof=0))), - (f.var_samp(column("c")), lambda a, b, c, d: np.array(np.var(c, ddof=1))), - ], -) -def test_aggregation_stats(df, agg_expr, calc_expected): - df = df.select("a", "b", "c", "d") - agg_df = df.aggregate([], [agg_expr]) - result = agg_df.collect()[0] - values_a, values_b, values_c, values_d = df.collect()[0] - expected = calc_expected(values_a, values_b, values_c, values_d) - np.testing.assert_array_almost_equal(result.column(0), expected) - - -@pytest.mark.parametrize( - ("agg_expr", "expected", "array_sort"), - [ - (f.approx_distinct(column("b")), pa.array([2], type=pa.uint64()), False), - ( - f.approx_distinct( - column("b"), - filter=column("a") != lit(3), - ), - pa.array([1], type=pa.uint64()), - False, - ), - (f.approx_median(column("b")), pa.array([4]), False), - (f.median(column("b"), distinct=True), pa.array([5]), False), - (f.median(column("b"), filter=column("a") != 2), pa.array([5]), False), - (f.approx_median(column("b"), filter=column("a") != 2), pa.array([5]), False), - (f.approx_percentile_cont(column("b"), 0.5), pa.array([4]), False), - ( - f.approx_percentile_cont( - column("b").sort(ascending=True, nulls_first=False), - 0.5, - num_centroids=2, - ), - pa.array([4]), - False, - ), - ( - f.approx_percentile_cont_with_weight(column("b"), lit(0.6), 0.5), - pa.array([4], type=pa.float64()), - False, - ), - ( - f.approx_percentile_cont_with_weight( - column("b").sort(ascending=False, nulls_first=False), lit(0.6), 0.5 - ), - pa.array([4], type=pa.float64()), - False, - ), - ( - f.approx_percentile_cont_with_weight( - column("b"), lit(0.6), 0.5, filter=column("a") != lit(3) - ), - pa.array([4], type=pa.float64()), - False, - ), - (f.array_agg(column("b")), pa.array([[4, 4, 6]]), False), - (f.array_agg(column("b"), distinct=True), pa.array([[4, 6]]), True), - ( - f.array_agg(column("e"), filter=column("e").is_not_null()), - pa.array([[1, 2]]), - False, - ), - ( - f.array_agg(column("b"), order_by=[column("c")]), - pa.array([[6, 4, 4]]), - False, - ), - ( - f.array_agg(column("b"), order_by=column("c")), - pa.array([[6, 4, 4]]), - False, - ), - (f.avg(column("b"), filter=column("a") != lit(1)), pa.array([5.0]), False), - (f.sum(column("b"), filter=column("a") != lit(1)), pa.array([10]), False), - (f.count(column("b"), distinct=True), pa.array([2]), False), - (f.count(column("b"), filter=column("a") != 3), pa.array([2]), False), - (f.count(), pa.array([3]), False), - (f.count(column("e")), pa.array([2]), False), - (f.count_star(filter=column("a") != 3), pa.array([2]), False), - (f.max(column("a"), filter=column("a") != lit(3)), pa.array([2]), False), - (f.min(column("a"), filter=column("a") != lit(1)), pa.array([2]), False), - ( - f.stddev(column("a"), filter=column("a") != lit(2)), - pa.array([np.sqrt(2)]), - False, - ), - ( - f.stddev_pop(column("a"), filter=column("a") != lit(2)), - pa.array([1.0]), - False, - ), - ], -) -def test_aggregation(df, agg_expr, expected, array_sort): - agg_df = df.aggregate([], [agg_expr.alias("agg_expr")]) - if array_sort: - agg_df = agg_df.select(f.array_sort(column("agg_expr"))) - agg_df.show() - result = agg_df.collect()[0] - - assert result.column(0) == expected - - -@pytest.mark.parametrize( - ("name", "expr", "expected"), - [ - ( - "approx_percentile_cont", - f.approx_percentile_cont(column("c3"), 0.95, num_centroids=200), - [73, 68, 122, 124, 115], - ), - ( - "approx_perc_cont_few_centroids", - f.approx_percentile_cont(column("c3"), 0.95, num_centroids=5), - [72, 68, 119, 124, 115], - ), - ( - "approx_perc_cont_filtered", - f.approx_percentile_cont( - column("c3"), 0.95, num_centroids=200, filter=column("c3") > lit(0) - ), - [83, 68, 122, 124, 117], - ), - ( - "corr", - f.corr(column("c3"), column("c2")), - [-0.1056, -0.2808, 0.0023, 0.0022, -0.2473], - ), - ( - "corr_w_filter", - f.corr(column("c3"), column("c2"), filter=column("c3") > lit(0)), - [-0.3298, 0.2925, 0.2467, -0.2269, 0.0358], - ), - ( - "covar_pop", - f.covar_pop(column("c3"), column("c2")), - [-7.2857, -25.6731, 0.2222, 0.2469, -20.2857], - ), - ( - "covar_pop_w_filter", - f.covar_pop(column("c3"), column("c2"), filter=column("c3") > lit(0)), - [-9.25, 9.0579, 13.7521, -9.9669, 1.1641], - ), - ( - "covar_samp", - f.covar_samp(column("c3"), column("c2")), - [-7.65, -27.0994, 0.2333, 0.2614, -21.3], - ), - ( - "covar_samp_w_filter", - f.covar_samp(column("c3"), column("c2"), filter=column("c3") > lit(0)), - [-10.5714, 9.9636, 15.1273, -10.9636, 1.2417], - ), - ( - "var_samp", - f.var_samp(column("c2")), - [1.9286, 2.2047, 1.6333, 2.1438, 1.6], - ), - ( - "var_samp_w_filter", - f.var_samp(column("c2"), filter=column("c3") > lit(0)), - [1.4286, 2.4182, 1.8545, 1.4727, 1.6292], - ), - ( - "var_pop", - f.var_pop(column("c2")), - [1.8367, 2.0886, 1.5556, 2.0247, 1.5238], - ), - ( - "var_pop_w_filter", - f.var_pop(column("c2"), filter=column("c3") > lit(0)), - [1.25, 2.1983, 1.686, 1.3388, 1.5273], - ), - ], -) -def test_aggregate_100(df_aggregate_100, name, expr, expected): - # https://github.com/apache/datafusion/blob/bddb6415a50746d2803dd908d19c3758952d74f9/datafusion/sqllogictest/test_files/aggregate.slt#L1490-L1498 - - df = ( - df_aggregate_100.aggregate( - [column("c1")], - [expr.alias(name)], - ) - .select("c1", f.round(column(name), lit(4)).alias(name)) - .sort(column("c1").sort(ascending=True)) - ) - df.show() - - expected_dict = { - "c1": ["a", "b", "c", "d", "e"], - name: expected, - } - - assert df.collect()[0].to_pydict() == expected_dict - - -data_test_bitwise_and_boolean_functions = [ - ("bit_and", f.bit_and(column("a")), [0]), - ("bit_and_filter", f.bit_and(column("a"), filter=column("a") != lit(2)), [1]), - ("bit_or", f.bit_or(column("b")), [6]), - ("bit_or_filter", f.bit_or(column("b"), filter=column("a") != lit(3)), [4]), - ("bit_xor", f.bit_xor(column("c")), [4]), - ("bit_xor_distinct", f.bit_xor(column("b"), distinct=True), [2]), - ("bit_xor_filter", f.bit_xor(column("b"), filter=column("a") != lit(3)), [0]), - ( - "bit_xor_filter_distinct", - f.bit_xor(column("b"), distinct=True, filter=column("a") != lit(3)), - [4], - ), - ("bool_and", f.bool_and(column("d")), [False]), - ("bool_and_filter", f.bool_and(column("d"), filter=column("a") != lit(3)), [True]), - ("bool_or", f.bool_or(column("d")), [True]), - ("bool_or_filter", f.bool_or(column("d"), filter=column("a") == lit(3)), [False]), -] - - -@pytest.mark.parametrize( - ("name", "expr", "result"), data_test_bitwise_and_boolean_functions -) -def test_bit_and_bool_fns(df, name, expr, result): - df = df.aggregate([], [expr.alias(name)]) - - expected = { - name: result, - } - - assert df.collect()[0].to_pydict() == expected - - -@pytest.mark.parametrize( - ("name", "expr", "result"), - [ - ("first_value", f.first_value(column("a")), [0, 4]), - ( - "first_value_ordered", - f.first_value(column("a"), order_by=[column("a").sort(ascending=False)]), - [3, 6], - ), - ( - "first_value_with_null", - f.first_value( - column("b"), - order_by=[column("b").sort(ascending=True)], - null_treatment=NullTreatment.RESPECT_NULLS, - ), - [None, None], - ), - ( - "first_value_no_list_order_by", - f.first_value( - column("b"), - order_by=column("b"), - null_treatment=NullTreatment.RESPECT_NULLS, - ), - [None, None], - ), - ( - "first_value_ignore_null", - f.first_value( - column("b"), - order_by=[column("b").sort(ascending=True)], - null_treatment=NullTreatment.IGNORE_NULLS, - ), - [7, 9], - ), - ( - "last_value_ordered", - f.last_value(column("a"), order_by=[column("a").sort(ascending=False)]), - [0, 4], - ), - ( - "last_value_no_list_ordered", - f.last_value(column("a"), order_by=column("a")), - [3, 6], - ), - ( - "last_value_with_null", - f.last_value( - column("b"), - order_by=[column("b").sort(ascending=True, nulls_first=False)], - null_treatment=NullTreatment.RESPECT_NULLS, - ), - [None, None], - ), - ( - "last_value_ignore_null", - f.last_value( - column("b"), - order_by=[column("b").sort(ascending=True)], - null_treatment=NullTreatment.IGNORE_NULLS, - ), - [8, 9], - ), - ( - "nth_value_ordered", - f.nth_value(column("a"), 2, order_by=[column("a").sort(ascending=False)]), - [2, 5], - ), - ( - "nth_value_no_list_ordered", - f.nth_value(column("a"), 2, order_by=column("a").sort(ascending=False)), - [2, 5], - ), - ( - "nth_value_with_null", - f.nth_value( - column("b"), - 3, - order_by=[column("b").sort(ascending=True, nulls_first=False)], - null_treatment=NullTreatment.RESPECT_NULLS, - ), - [8, None], - ), - ( - "nth_value_ignore_null", - f.nth_value( - column("b"), - 2, - order_by=[column("b").sort(ascending=True)], - null_treatment=NullTreatment.IGNORE_NULLS, - ), - [7, 9], - ), - ], -) -def test_first_last_value(df_partitioned, name, expr, result) -> None: - df = df_partitioned.aggregate([column("c")], [expr.alias(name)]).sort(column("c")) - - expected = { - "c": ["A", "B"], - name: result, - } - - assert df.collect()[0].to_pydict() == expected - - -@pytest.mark.parametrize( - ("name", "expr", "result"), - [ - ("string_agg", f.string_agg(column("a"), ","), "one,two,three,two"), - ("string_agg", f.string_agg(column("b"), ""), "03124"), - ( - "string_agg", - f.string_agg(column("a"), ",", filter=column("b") != lit(3)), - "one,three,two", - ), - ( - "string_agg", - f.string_agg(column("a"), ",", order_by=[column("b")]), - "one,three,two,two", - ), - ( - "string_agg", - f.string_agg(column("a"), ",", order_by=column("b")), - "one,three,two,two", - ), - ], -) -def test_string_agg(name, expr, result) -> None: - ctx = SessionContext() - - df = ctx.from_pydict( - { - "a": ["one", "two", None, "three", "two"], - "b": [0, 3, 1, 2, 4], - } - ) - - df = df.aggregate([], [expr.alias(name)]) - - expected = { - name: [result], - } - df.show() - assert df.collect()[0].to_pydict() == expected diff --git a/python/tests/test_catalog.py b/python/tests/test_catalog.py deleted file mode 100644 index 9310da506..000000000 --- a/python/tests/test_catalog.py +++ /dev/null @@ -1,316 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from typing import TYPE_CHECKING - -import datafusion as dfn -import pyarrow as pa -import pyarrow.dataset as ds -import pytest -from datafusion import Catalog, SessionContext, Table, udtf - -if TYPE_CHECKING: - from datafusion.catalog import CatalogProvider, CatalogProviderExportable - - -# Note we take in `database` as a variable even though we don't use -# it because that will cause the fixture to set up the context with -# the tables we need. -def test_basic(ctx, database): - with pytest.raises(KeyError): - ctx.catalog("non-existent") - - default = ctx.catalog() - assert default.names() == {"public"} - - for db in [default.schema("public"), default.schema()]: - assert db.names() == {"csv1", "csv", "csv2"} - - table = db.table("csv") - assert table.kind == "physical" - assert table.schema == pa.schema( - [ - pa.field("int", pa.int64(), nullable=True), - pa.field("str", pa.string(), nullable=True), - pa.field("float", pa.float64(), nullable=True), - ] - ) - - -def create_dataset() -> Table: - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - dataset = ds.dataset([batch]) - return Table(dataset) - - -class CustomSchemaProvider(dfn.catalog.SchemaProvider): - def __init__(self): - self.tables = {"table1": create_dataset()} - - def table_names(self) -> set[str]: - return set(self.tables.keys()) - - def register_table(self, name: str, table: Table): - self.tables[name] = table - - def deregister_table(self, name, cascade: bool = True): - del self.tables[name] - - def table(self, name: str) -> Table | None: - return self.tables[name] - - def table_exist(self, name: str) -> bool: - return name in self.tables - - -class CustomErrorSchemaProvider(CustomSchemaProvider): - def table(self, name: str) -> Table | None: - message = f"{name} is not an acceptable name" - raise ValueError(message) - - -class CustomCatalogProvider(dfn.catalog.CatalogProvider): - def __init__(self): - self.schemas = {"my_schema": CustomSchemaProvider()} - - def schema_names(self) -> set[str]: - return set(self.schemas.keys()) - - def schema(self, name: str): - return self.schemas[name] - - def register_schema(self, name: str, schema: dfn.catalog.Schema): - self.schemas[name] = schema - - def deregister_schema(self, name, cascade: bool): - del self.schemas[name] - - -class CustomCatalogProviderList(dfn.catalog.CatalogProviderList): - def __init__(self): - self.catalogs = {"my_catalog": CustomCatalogProvider()} - - def catalog_names(self) -> set[str]: - return set(self.catalogs.keys()) - - def catalog(self, name: str) -> Catalog | None: - return self.catalogs[name] - - def register_catalog( - self, name: str, catalog: CatalogProviderExportable | CatalogProvider | Catalog - ) -> None: - self.catalogs[name] = catalog - - -def test_python_catalog_provider_list(ctx: SessionContext): - ctx.register_catalog_provider_list(CustomCatalogProviderList()) - - # Ensure `datafusion` catalog does not exist since - # we replaced the catalog list - assert ctx.catalog_names() == {"my_catalog"} - - # Ensure registering works - ctx.register_catalog_provider("second_catalog", Catalog.memory_catalog()) - assert ctx.catalog_names() == {"my_catalog", "second_catalog"} - - -def test_python_catalog_provider(ctx: SessionContext): - ctx.register_catalog_provider("my_catalog", CustomCatalogProvider()) - - # Check the default catalog provider - assert ctx.catalog("datafusion").names() == {"public"} - - my_catalog = ctx.catalog("my_catalog") - assert my_catalog.names() == {"my_schema"} - - my_catalog.register_schema("second_schema", CustomSchemaProvider()) - assert my_catalog.schema_names() == {"my_schema", "second_schema"} - - my_catalog.deregister_schema("my_schema") - assert my_catalog.schema_names() == {"second_schema"} - - -def test_in_memory_providers(ctx: SessionContext): - catalog = dfn.catalog.Catalog.memory_catalog() - ctx.register_catalog_provider("in_mem_catalog", catalog) - - assert ctx.catalog_names() == {"datafusion", "in_mem_catalog"} - - schema = dfn.catalog.Schema.memory_schema() - catalog.register_schema("in_mem_schema", schema) - - schema.register_table("my_table", create_dataset()) - - batches = ctx.sql("select * from in_mem_catalog.in_mem_schema.my_table").collect() - - assert len(batches) == 1 - assert batches[0].column(0) == pa.array([1, 2, 3]) - assert batches[0].column(1) == pa.array([4, 5, 6]) - - -def test_python_schema_provider(ctx: SessionContext): - catalog = ctx.catalog() - - catalog.deregister_schema("public") - - catalog.register_schema("test_schema1", CustomSchemaProvider()) - assert catalog.names() == {"test_schema1"} - - catalog.register_schema("test_schema2", CustomSchemaProvider()) - catalog.deregister_schema("test_schema1") - assert catalog.names() == {"test_schema2"} - - -def test_python_table_provider(ctx: SessionContext): - catalog = ctx.catalog() - - catalog.register_schema("custom_schema", CustomSchemaProvider()) - schema = catalog.schema("custom_schema") - - assert schema.table_names() == {"table1"} - - schema.deregister_table("table1") - schema.register_table("table2", create_dataset()) - assert schema.table_names() == {"table2"} - - # Use the default schema instead of our custom schema - - schema = catalog.schema() - - schema.register_table("table3", create_dataset()) - assert schema.table_names() == {"table3"} - - schema.deregister_table("table3") - schema.register_table("table4", create_dataset()) - assert schema.table_names() == {"table4"} - - -def test_schema_register_table_with_pyarrow_dataset(ctx: SessionContext): - schema = ctx.catalog().schema() - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - dataset = ds.dataset([batch]) - table_name = "pa_dataset" - - try: - schema.register_table(table_name, dataset) - assert table_name in schema.table_names() - - result = ctx.sql(f"SELECT a, b FROM {table_name}").collect() - - assert len(result) == 1 - assert result[0].column(0) == pa.array([1, 2, 3]) - assert result[0].column(1) == pa.array([4, 5, 6]) - finally: - schema.deregister_table(table_name) - - -def test_exception_not_mangled(ctx: SessionContext): - """Test registering all python providers and running a query against them.""" - - catalog_name = "custom_catalog" - schema_name = "custom_schema" - - ctx.register_catalog_provider(catalog_name, CustomCatalogProvider()) - - catalog = ctx.catalog(catalog_name) - - # Clean out previous schemas if they exist so we can start clean - for schema_name in catalog.schema_names(): - catalog.deregister_schema(schema_name, cascade=False) - - catalog.register_schema(schema_name, CustomErrorSchemaProvider()) - - schema = catalog.schema(schema_name) - - for table_name in schema.table_names(): - schema.deregister_table(table_name) - - schema.register_table("test_table", create_dataset()) - - with pytest.raises(ValueError, match=r"^test_table is not an acceptable name$"): - ctx.sql(f"select * from {catalog_name}.{schema_name}.test_table") - - -def test_in_end_to_end_python_providers(ctx: SessionContext): - """Test registering all python providers and running a query against them.""" - - all_catalog_names = [ - "datafusion", - "custom_catalog", - "in_mem_catalog", - ] - - all_schema_names = [ - "custom_schema", - "in_mem_schema", - ] - - ctx.register_catalog_provider(all_catalog_names[1], CustomCatalogProvider()) - ctx.register_catalog_provider( - all_catalog_names[2], dfn.catalog.Catalog.memory_catalog() - ) - - for catalog_name in all_catalog_names: - catalog = ctx.catalog(catalog_name) - - # Clean out previous schemas if they exist so we can start clean - for schema_name in catalog.schema_names(): - catalog.deregister_schema(schema_name, cascade=False) - - catalog.register_schema(all_schema_names[0], CustomSchemaProvider()) - catalog.register_schema(all_schema_names[1], dfn.catalog.Schema.memory_schema()) - - for schema_name in all_schema_names: - schema = catalog.schema(schema_name) - - for table_name in schema.table_names(): - schema.deregister_table(table_name) - - schema.register_table("test_table", create_dataset()) - - for catalog_name in all_catalog_names: - for schema_name in all_schema_names: - table_full_name = f"{catalog_name}.{schema_name}.test_table" - - batches = ctx.sql(f"select * from {table_full_name}").collect() - - assert len(batches) == 1 - assert batches[0].column(0) == pa.array([1, 2, 3]) - assert batches[0].column(1) == pa.array([4, 5, 6]) - - -def test_register_python_function_as_udtf(ctx: SessionContext): - basic_table = Table(ctx.sql("SELECT 3 AS value")) - - @udtf("my_table_function") - def my_table_function_udtf() -> Table: - return basic_table - - ctx.register_udtf(my_table_function_udtf) - - result = ctx.sql("SELECT * FROM my_table_function()").collect() - assert len(result) == 1 - assert len(result[0]) == 1 - assert len(result[0][0]) == 1 - assert result[0][0][0].as_py() == 3 diff --git a/python/tests/test_concurrency.py b/python/tests/test_concurrency.py deleted file mode 100644 index f790f9473..000000000 --- a/python/tests/test_concurrency.py +++ /dev/null @@ -1,126 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import annotations - -from concurrent.futures import ThreadPoolExecutor - -import pyarrow as pa -from datafusion import Config, SessionContext, col, lit -from datafusion import functions as f -from datafusion.common import SqlSchema - - -def _run_in_threads(fn, count: int = 8) -> None: - with ThreadPoolExecutor(max_workers=count) as executor: - futures = [executor.submit(fn, i) for i in range(count)] - for future in futures: - # Propagate any exception raised in the worker thread. - future.result() - - -def test_concurrent_access_to_shared_structures() -> None: - """Exercise SqlSchema, Config, and DataFrame concurrently.""" - - schema = SqlSchema("concurrency") - config = Config() - ctx = SessionContext() - - batch = pa.record_batch([pa.array([1, 2, 3], type=pa.int32())], names=["value"]) - df = ctx.create_dataframe([[batch]]) - - config_key = "datafusion.execution.batch_size" - expected_rows = batch.num_rows - - def worker(index: int) -> None: - schema.name = f"concurrency-{index}" - assert schema.name.startswith("concurrency-") - # Exercise getters that use internal locks. - assert isinstance(schema.tables, list) - assert isinstance(schema.views, list) - assert isinstance(schema.functions, list) - - config.set(config_key, str(1024 + index)) - assert config.get(config_key) is not None - # Access the full config map to stress lock usage. - assert config_key in config.get_all() - - batches = df.collect() - assert sum(batch.num_rows for batch in batches) == expected_rows - - _run_in_threads(worker, count=12) - - -def test_config_set_during_get_all() -> None: - """Ensure config writes proceed while another thread reads all entries.""" - - config = Config() - key = "datafusion.execution.batch_size" - - def reader() -> None: - for _ in range(200): - # get_all should not hold the lock while converting to Python objects - config.get_all() - - def writer() -> None: - for index in range(200): - config.set(key, str(1024 + index)) - - with ThreadPoolExecutor(max_workers=2) as executor: - reader_future = executor.submit(reader) - writer_future = executor.submit(writer) - reader_future.result(timeout=10) - writer_future.result(timeout=10) - - assert config.get(key) is not None - - -def test_case_builder_reuse_from_multiple_threads() -> None: - """Ensure the case builder can be safely reused across threads.""" - - ctx = SessionContext() - values = pa.array([0, 1, 2, 3, 4], type=pa.int32()) - df = ctx.create_dataframe([[pa.record_batch([values], names=["value"])]]) - - base_builder = f.case(col("value")) - - def add_case(i: int) -> None: - nonlocal base_builder - base_builder = base_builder.when(lit(i), lit(f"value-{i}")) - - _run_in_threads(add_case, count=8) - - with ThreadPoolExecutor(max_workers=2) as executor: - otherwise_future = executor.submit(base_builder.otherwise, lit("default")) - case_expr = otherwise_future.result() - - result = df.select(case_expr.alias("label")).collect() - assert sum(batch.num_rows for batch in result) == len(values) - - predicate_builder = f.when(col("value") == lit(0), lit("zero")) - - def add_predicate(i: int) -> None: - predicate_builder.when(col("value") == lit(i + 1), lit(f"value-{i + 1}")) - - _run_in_threads(add_predicate, count=4) - - with ThreadPoolExecutor(max_workers=2) as executor: - end_future = executor.submit(predicate_builder.end) - predicate_expr = end_future.result() - - result = df.select(predicate_expr.alias("label")).collect() - assert sum(batch.num_rows for batch in result) == len(values) diff --git a/python/tests/test_config.py b/python/tests/test_config.py deleted file mode 100644 index c1d7f97e1..000000000 --- a/python/tests/test_config.py +++ /dev/null @@ -1,42 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pytest -from datafusion import Config - - -@pytest.fixture -def config(): - return Config() - - -def test_get_then_set(config): - config_key = "datafusion.optimizer.filter_null_join_keys" - - assert config.get(config_key) == "false" - - config.set(config_key, "true") - assert config.get(config_key) == "true" - - -def test_get_all(config): - config_dict = config.get_all() - assert config_dict["datafusion.catalog.create_default_catalog_and_schema"] == "true" - - -def test_get_invalid_config(config): - assert config.get("not.valid.key") is None diff --git a/python/tests/test_context.py b/python/tests/test_context.py deleted file mode 100644 index 5df6ed20f..000000000 --- a/python/tests/test_context.py +++ /dev/null @@ -1,872 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -import datetime as dt -import gzip -import pathlib - -import pyarrow as pa -import pyarrow.dataset as ds -import pytest -from datafusion import ( - CsvReadOptions, - DataFrame, - RuntimeEnvBuilder, - SessionConfig, - SessionContext, - SQLOptions, - Table, - column, - literal, -) - - -def test_create_context_no_args(): - SessionContext() - - -def test_create_context_session_config_only(): - SessionContext(config=SessionConfig()) - - -def test_create_context_runtime_config_only(): - SessionContext(runtime=RuntimeEnvBuilder()) - - -@pytest.mark.parametrize("path_to_str", [True, False]) -def test_runtime_configs(tmp_path, path_to_str): - path1 = tmp_path / "dir1" - path2 = tmp_path / "dir2" - - path1 = str(path1) if path_to_str else path1 - path2 = str(path2) if path_to_str else path2 - - runtime = RuntimeEnvBuilder().with_disk_manager_specified(path1, path2) - config = SessionConfig().with_default_catalog_and_schema("foo", "bar") - ctx = SessionContext(config, runtime) - assert ctx is not None - - db = ctx.catalog("foo").schema("bar") - assert db is not None - - -@pytest.mark.parametrize("path_to_str", [True, False]) -def test_temporary_files(tmp_path, path_to_str): - path = str(tmp_path) if path_to_str else tmp_path - - runtime = RuntimeEnvBuilder().with_temp_file_path(path) - config = SessionConfig().with_default_catalog_and_schema("foo", "bar") - ctx = SessionContext(config, runtime) - assert ctx is not None - - db = ctx.catalog("foo").schema("bar") - assert db is not None - - -def test_create_context_with_all_valid_args(): - runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000) - config = ( - SessionConfig() - .with_create_default_catalog_and_schema(enabled=True) - .with_default_catalog_and_schema("foo", "bar") - .with_target_partitions(1) - .with_information_schema(enabled=True) - .with_repartition_joins(enabled=False) - .with_repartition_aggregations(enabled=False) - .with_repartition_windows(enabled=False) - .with_parquet_pruning(enabled=False) - ) - - ctx = SessionContext(config, runtime) - - # verify that at least some of the arguments worked - ctx.catalog("foo").schema("bar") - with pytest.raises(KeyError): - ctx.catalog("datafusion") - - -def test_register_record_batches(ctx): - # create a RecordBatch and register it as memtable - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - - ctx.register_record_batches("t", [[batch]]) - - assert ctx.catalog().schema().names() == {"t"} - - result = ctx.sql("SELECT a+b, a-b FROM t").collect() - - assert result[0].column(0) == pa.array([5, 7, 9]) - assert result[0].column(1) == pa.array([-3, -3, -3]) - - -def test_create_dataframe_registers_unique_table_name(ctx): - # create a RecordBatch and register it as memtable - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - - df = ctx.create_dataframe([[batch]]) - tables = list(ctx.catalog().schema().names()) - - assert df - assert len(tables) == 1 - assert len(tables[0]) == 33 - assert tables[0].startswith("c") - # ensure that the rest of the table name contains - # only hexadecimal numbers - for c in tables[0][1:]: - assert c in "0123456789abcdef" - - -def test_create_dataframe_registers_with_defined_table_name(ctx): - # create a RecordBatch and register it as memtable - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - - df = ctx.create_dataframe([[batch]], name="tbl") - tables = list(ctx.catalog().schema().names()) - - assert df - assert len(tables) == 1 - assert tables[0] == "tbl" - - -def test_from_arrow_table(ctx): - # create a PyArrow table - data = {"a": [1, 2, 3], "b": [4, 5, 6]} - table = pa.Table.from_pydict(data) - - # convert to DataFrame - df = ctx.from_arrow(table) - tables = list(ctx.catalog().schema().names()) - - assert df - assert len(tables) == 1 - assert isinstance(df, DataFrame) - assert set(df.schema().names) == {"a", "b"} - assert df.collect()[0].num_rows == 3 - - -def record_batch_generator(num_batches: int): - schema = pa.schema([("a", pa.int64()), ("b", pa.int64())]) - for _i in range(num_batches): - yield pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], schema=schema - ) - - -@pytest.mark.parametrize( - "source", - [ - # __arrow_c_array__ sources - pa.array([{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}]), - # __arrow_c_stream__ sources - pa.RecordBatch.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]}), - pa.RecordBatchReader.from_batches( - pa.schema([("a", pa.int64()), ("b", pa.int64())]), record_batch_generator(1) - ), - pa.Table.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]}), - ], -) -def test_from_arrow_sources(ctx, source) -> None: - df = ctx.from_arrow(source) - assert df - assert isinstance(df, DataFrame) - assert df.schema().names == ["a", "b"] - assert df.count() == 3 - - -def test_from_arrow_table_with_name(ctx): - # create a PyArrow table - data = {"a": [1, 2, 3], "b": [4, 5, 6]} - table = pa.Table.from_pydict(data) - - # convert to DataFrame with optional name - df = ctx.from_arrow(table, name="tbl") - tables = list(ctx.catalog().schema().names()) - - assert df - assert tables[0] == "tbl" - - -def test_from_arrow_table_empty(ctx): - data = {"a": [], "b": []} - schema = pa.schema([("a", pa.int32()), ("b", pa.string())]) - table = pa.Table.from_pydict(data, schema=schema) - - # convert to DataFrame - df = ctx.from_arrow(table) - tables = list(ctx.catalog().schema().names()) - - assert df - assert len(tables) == 1 - assert isinstance(df, DataFrame) - assert set(df.schema().names) == {"a", "b"} - assert len(df.collect()) == 0 - - -def test_from_arrow_table_empty_no_schema(ctx): - data = {"a": [], "b": []} - table = pa.Table.from_pydict(data) - - # convert to DataFrame - df = ctx.from_arrow(table) - tables = list(ctx.catalog().schema().names()) - - assert df - assert len(tables) == 1 - assert isinstance(df, DataFrame) - assert set(df.schema().names) == {"a", "b"} - assert len(df.collect()) == 0 - - -def test_from_pylist(ctx): - # create a dataframe from Python list - data = [ - {"a": 1, "b": 4}, - {"a": 2, "b": 5}, - {"a": 3, "b": 6}, - ] - - df = ctx.from_pylist(data) - tables = list(ctx.catalog().schema().names()) - - assert df - assert len(tables) == 1 - assert isinstance(df, DataFrame) - assert set(df.schema().names) == {"a", "b"} - assert df.collect()[0].num_rows == 3 - - -def test_from_pydict(ctx): - # create a dataframe from Python dictionary - data = {"a": [1, 2, 3], "b": [4, 5, 6]} - - df = ctx.from_pydict(data) - tables = list(ctx.catalog().schema().names()) - - assert df - assert len(tables) == 1 - assert isinstance(df, DataFrame) - assert set(df.schema().names) == {"a", "b"} - assert df.collect()[0].num_rows == 3 - - -def test_from_pandas(ctx): - # create a dataframe from pandas dataframe - pd = pytest.importorskip("pandas") - data = {"a": [1, 2, 3], "b": [4, 5, 6]} - pandas_df = pd.DataFrame(data) - - df = ctx.from_pandas(pandas_df) - tables = list(ctx.catalog().schema().names()) - - assert df - assert len(tables) == 1 - assert isinstance(df, DataFrame) - assert set(df.schema().names) == {"a", "b"} - assert df.collect()[0].num_rows == 3 - - -def test_from_polars(ctx): - # create a dataframe from Polars dataframe - pd = pytest.importorskip("polars") - data = {"a": [1, 2, 3], "b": [4, 5, 6]} - polars_df = pd.DataFrame(data) - - df = ctx.from_polars(polars_df) - tables = list(ctx.catalog().schema().names()) - - assert df - assert len(tables) == 1 - assert isinstance(df, DataFrame) - assert set(df.schema().names) == {"a", "b"} - assert df.collect()[0].num_rows == 3 - - -def test_register_table(ctx, database): - default = ctx.catalog() - public = default.schema("public") - assert public.names() == {"csv", "csv1", "csv2"} - table = public.table("csv") - - ctx.register_table("csv3", table) - assert public.names() == {"csv", "csv1", "csv2", "csv3"} - - -def test_read_table_from_catalog(ctx, database): - default = ctx.catalog() - public = default.schema("public") - assert public.names() == {"csv", "csv1", "csv2"} - - table = public.table("csv") - table_df = ctx.read_table(table) - table_df.show() - - -def test_read_table_from_df(ctx): - df = ctx.from_pydict({"a": [1, 2]}) - result = ctx.read_table(df).collect() - assert [b.to_pydict() for b in result] == [{"a": [1, 2]}] - - -def test_read_table_from_dataset(ctx): - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - dataset = ds.dataset([batch]) - - result = ctx.read_table(dataset).collect() - - assert result[0].column(0) == pa.array([1, 2, 3]) - assert result[0].column(1) == pa.array([4, 5, 6]) - - -def test_deregister_table(ctx, database): - default = ctx.catalog() - public = default.schema("public") - assert public.names() == {"csv", "csv1", "csv2"} - - ctx.deregister_table("csv") - assert public.names() == {"csv1", "csv2"} - - -def test_register_table_from_dataframe(ctx): - df = ctx.from_pydict({"a": [1, 2]}) - ctx.register_table("df_tbl", df) - result = ctx.sql("SELECT * FROM df_tbl").collect() - assert [b.to_pydict() for b in result] == [{"a": [1, 2]}] - - -@pytest.mark.parametrize("temporary", [True, False]) -def test_register_table_from_dataframe_into_view(ctx, temporary): - df = ctx.from_pydict({"a": [1, 2]}) - table = df.into_view(temporary=temporary) - assert isinstance(table, Table) - if temporary: - assert table.kind == "temporary" - else: - assert table.kind == "view" - - ctx.register_table("view_tbl", table) - result = ctx.sql("SELECT * FROM view_tbl").collect() - assert [b.to_pydict() for b in result] == [{"a": [1, 2]}] - - -def test_table_from_dataframe(ctx): - df = ctx.from_pydict({"a": [1, 2]}) - table = Table(df) - assert isinstance(table, Table) - ctx.register_table("from_dataframe_tbl", table) - result = ctx.sql("SELECT * FROM from_dataframe_tbl").collect() - assert [b.to_pydict() for b in result] == [{"a": [1, 2]}] - - -def test_table_from_dataframe_internal(ctx): - df = ctx.from_pydict({"a": [1, 2]}) - table = Table(df.df) - assert isinstance(table, Table) - ctx.register_table("from_internal_dataframe_tbl", table) - result = ctx.sql("SELECT * FROM from_internal_dataframe_tbl").collect() - assert [b.to_pydict() for b in result] == [{"a": [1, 2]}] - - -def test_register_dataset(ctx): - # create a RecordBatch and register it as a pyarrow.dataset.Dataset - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - dataset = ds.dataset([batch]) - ctx.register_dataset("t", dataset) - - assert ctx.catalog().schema().names() == {"t"} - - result = ctx.sql("SELECT a+b, a-b FROM t").collect() - - assert result[0].column(0) == pa.array([5, 7, 9]) - assert result[0].column(1) == pa.array([-3, -3, -3]) - - -def test_dataset_filter(ctx, capfd): - # create a RecordBatch and register it as a pyarrow.dataset.Dataset - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - dataset = ds.dataset([batch]) - ctx.register_dataset("t", dataset) - - assert ctx.catalog().schema().names() == {"t"} - df = ctx.sql("SELECT a+b, a-b FROM t WHERE a BETWEEN 2 and 3 AND b > 5") - - # Make sure the filter was pushed down in Physical Plan - df.explain() - captured = capfd.readouterr() - assert "filter_expr=(((a >= 2) and (a <= 3)) and (b > 5))" in captured.out - - result = df.collect() - - assert result[0].column(0) == pa.array([9]) - assert result[0].column(1) == pa.array([-3]) - - -def test_dataset_count(ctx): - # `datafusion-python` issue: https://github.com/apache/datafusion-python/issues/800 - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - dataset = ds.dataset([batch]) - ctx.register_dataset("t", dataset) - - # Testing the dataframe API - df = ctx.table("t") - assert df.count() == 3 - - # Testing the SQL API - count = ctx.sql("SELECT COUNT(*) FROM t") - count = count.collect() - assert count[0].column(0) == pa.array([3]) - - -def test_pyarrow_predicate_pushdown_is_null(ctx, capfd): - """Ensure that pyarrow filter gets pushed down for `IsNull`""" - # create a RecordBatch and register it as a pyarrow.dataset.Dataset - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6]), pa.array([7, None, 9])], - names=["a", "b", "c"], - ) - dataset = ds.dataset([batch]) - ctx.register_dataset("t", dataset) - # Make sure the filter was pushed down in Physical Plan - df = ctx.sql("SELECT a FROM t WHERE c is NULL") - df.explain() - captured = capfd.readouterr() - assert "filter_expr=is_null(c, {nan_is_null=false})" in captured.out - - result = df.collect() - assert result[0].column(0) == pa.array([2]) - - -def test_pyarrow_predicate_pushdown_timestamp(ctx, tmpdir, capfd): - """Ensure that pyarrow filter gets pushed down for timestamp""" - # Ref: https://github.com/apache/datafusion-python/issues/703 - - # create pyarrow dataset with no actual files - col_type = pa.timestamp("ns", "+00:00") - nyd_2000 = pa.scalar(dt.datetime(2000, 1, 1, tzinfo=dt.timezone.utc), col_type) - pa_dataset_fs = pa.fs.SubTreeFileSystem(str(tmpdir), pa.fs.LocalFileSystem()) - pa_dataset_format = pa.dataset.ParquetFileFormat() - pa_dataset_partition = pa.dataset.field("a") <= nyd_2000 - fragments = [ - # NOTE: we never actually make this file. - # Working predicate pushdown means it never gets accessed - pa_dataset_format.make_fragment( - "1.parquet", - filesystem=pa_dataset_fs, - partition_expression=pa_dataset_partition, - ) - ] - pa_dataset = pa.dataset.FileSystemDataset( - fragments, - pa.schema([pa.field("a", col_type)]), - pa_dataset_format, - pa_dataset_fs, - ) - - ctx.register_dataset("t", pa_dataset) - - # the partition for our only fragment is for a < 2000-01-01. - # so querying for a > 2024-01-01 should not touch any files - df = ctx.sql("SELECT * FROM t WHERE a > '2024-01-01T00:00:00+00:00'") - assert df.collect() == [] - - -def test_dataset_filter_nested_data(ctx): - # create Arrow StructArrays to test nested data types - data = pa.StructArray.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - batch = pa.RecordBatch.from_arrays( - [data], - names=["nested_data"], - ) - dataset = ds.dataset([batch]) - ctx.register_dataset("t", dataset) - - assert ctx.catalog().schema().names() == {"t"} - - df = ctx.table("t") - - # This filter will not be pushed down to DatasetExec since it - # isn't supported - df = df.filter(column("nested_data")["b"] > literal(5)).select( - column("nested_data")["a"] + column("nested_data")["b"], - column("nested_data")["a"] - column("nested_data")["b"], - ) - - result = df.collect() - - assert result[0].column(0) == pa.array([9]) - assert result[0].column(1) == pa.array([-3]) - - -def test_table_exist(ctx): - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - dataset = ds.dataset([batch]) - ctx.register_dataset("t", dataset) - - assert ctx.table_exist("t") is True - - -def test_table_not_found(ctx): - from uuid import uuid4 - - with pytest.raises(KeyError): - ctx.table(f"not-found-{uuid4()}") - - -def test_read_json(ctx): - path = pathlib.Path(__file__).parent.resolve() - - # Default - test_data_path = path / "data_test_context" / "data.json" - df = ctx.read_json(test_data_path) - result = df.collect() - - assert result[0].column(0) == pa.array(["a", "b", "c"]) - assert result[0].column(1) == pa.array([1, 2, 3]) - - # Schema - schema = pa.schema( - [ - pa.field("A", pa.string(), nullable=True), - ] - ) - df = ctx.read_json(test_data_path, schema=schema) - result = df.collect() - - assert result[0].column(0) == pa.array(["a", "b", "c"]) - assert result[0].schema == schema - - # File extension - test_data_path = path / "data_test_context" / "data.json" - df = ctx.read_json(test_data_path, file_extension=".json") - result = df.collect() - - assert result[0].column(0) == pa.array(["a", "b", "c"]) - assert result[0].column(1) == pa.array([1, 2, 3]) - - -def test_read_json_compressed(ctx, tmp_path): - path = pathlib.Path(__file__).parent.resolve() - test_data_path = path / "data_test_context" / "data.json" - - # File compression type - gzip_path = tmp_path / "data.json.gz" - - with ( - pathlib.Path.open(test_data_path, "rb") as csv_file, - gzip.open(gzip_path, "wb") as gzipped_file, - ): - gzipped_file.writelines(csv_file) - - df = ctx.read_json(gzip_path, file_extension=".gz", file_compression_type="gz") - result = df.collect() - - assert result[0].column(0) == pa.array(["a", "b", "c"]) - assert result[0].column(1) == pa.array([1, 2, 3]) - - -def test_read_csv(ctx): - csv_df = ctx.read_csv(path="testing/data/csv/aggregate_test_100.csv") - csv_df.select(column("c1")).show() - - -def test_read_csv_list(ctx): - csv_df = ctx.read_csv(path=["testing/data/csv/aggregate_test_100.csv"]) - expected = csv_df.count() * 2 - - double_csv_df = ctx.read_csv( - path=[ - "testing/data/csv/aggregate_test_100.csv", - "testing/data/csv/aggregate_test_100.csv", - ] - ) - actual = double_csv_df.count() - - double_csv_df.select(column("c1")).show() - assert actual == expected - - -def test_read_csv_compressed(ctx, tmp_path): - test_data_path = pathlib.Path("testing/data/csv/aggregate_test_100.csv") - - expected = ctx.read_csv(test_data_path).collect() - - # File compression type - gzip_path = tmp_path / "aggregate_test_100.csv.gz" - - with ( - pathlib.Path.open(test_data_path, "rb") as csv_file, - gzip.open(gzip_path, "wb") as gzipped_file, - ): - gzipped_file.writelines(csv_file) - - csv_df = ctx.read_csv(gzip_path, file_extension=".gz", file_compression_type="gz") - assert csv_df.collect() == expected - - csv_df = ctx.read_csv( - gzip_path, - options=CsvReadOptions(file_extension=".gz", file_compression_type="gz"), - ) - assert csv_df.collect() == expected - - -def test_read_parquet(ctx): - parquet_df = ctx.read_parquet(path="parquet/data/alltypes_plain.parquet") - parquet_df.show() - assert parquet_df is not None - - path = pathlib.Path.cwd() / "parquet/data/alltypes_plain.parquet" - parquet_df = ctx.read_parquet(path=path) - assert parquet_df is not None - - -def test_read_avro(ctx): - avro_df = ctx.read_avro(path="testing/data/avro/alltypes_plain.avro") - avro_df.show() - assert avro_df is not None - - path = pathlib.Path.cwd() / "testing/data/avro/alltypes_plain.avro" - avro_df = ctx.read_avro(path=path) - assert avro_df is not None - - -def test_create_sql_options(): - SQLOptions() - - -def test_sql_with_options_no_ddl(ctx): - sql = "CREATE TABLE IF NOT EXISTS valuetable AS VALUES(1,'HELLO'),(12,'DATAFUSION')" - ctx.sql(sql) - options = SQLOptions().with_allow_ddl(allow=False) - with pytest.raises(Exception, match="DDL"): - ctx.sql_with_options(sql, options=options) - - -def test_sql_with_options_no_dml(ctx): - table_name = "t" - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - dataset = ds.dataset([batch]) - ctx.register_dataset(table_name, dataset) - sql = f'INSERT INTO "{table_name}" VALUES (1, 2), (2, 3);' - ctx.sql(sql) - options = SQLOptions().with_allow_dml(allow=False) - with pytest.raises(Exception, match="DML"): - ctx.sql_with_options(sql, options=options) - - -def test_sql_with_options_no_statements(ctx): - sql = "SET time zone = 1;" - ctx.sql(sql) - options = SQLOptions().with_allow_statements(allow=False) - with pytest.raises(Exception, match="SetVariable"): - ctx.sql_with_options(sql, options=options) - - -@pytest.fixture -def batch(): - return pa.RecordBatch.from_arrays( - [pa.array([4, 5, 6])], - names=["a"], - ) - - -def test_create_dataframe_with_global_ctx(batch): - ctx = SessionContext.global_ctx() - - df = ctx.create_dataframe([[batch]]) - - result = df.collect()[0].column(0) - - assert result == pa.array([4, 5, 6]) - - -def test_csv_read_options_builder_pattern(): - """Test CsvReadOptions builder pattern.""" - from datafusion import CsvReadOptions - - options = ( - CsvReadOptions() - .with_has_header(False) # noqa: FBT003 - .with_delimiter("|") - .with_quote("'") - .with_schema_infer_max_records(2000) - .with_truncated_rows(True) # noqa: FBT003 - .with_newlines_in_values(True) # noqa: FBT003 - .with_file_extension(".tsv") - ) - assert options.has_header is False - assert options.delimiter == "|" - assert options.quote == "'" - assert options.schema_infer_max_records == 2000 - assert options.truncated_rows is True - assert options.newlines_in_values is True - assert options.file_extension == ".tsv" - - -def read_csv_with_options_inner( - tmp_path: pathlib.Path, - csv_content: str, - options: CsvReadOptions, - expected: pa.RecordBatch, - as_read: bool, - global_ctx: bool, -) -> None: - from datafusion import SessionContext - - # Create a test CSV file - group_dir = tmp_path / "group=a" - group_dir.mkdir(exist_ok=True) - - csv_path = group_dir / "test.csv" - csv_path.write_text(csv_content, newline="\n") - - ctx = SessionContext() - - if as_read: - if global_ctx: - from datafusion.io import read_csv - - df = read_csv(str(tmp_path), options=options) - else: - df = ctx.read_csv(str(tmp_path), options=options) - else: - ctx.register_csv("test_table", str(tmp_path), options=options) - df = ctx.sql("SELECT * FROM test_table") - df.show() - - # Verify the data - result = df.collect() - assert len(result) == 1 - assert result[0] == expected - - -@pytest.mark.parametrize( - ("as_read", "global_ctx"), - [ - (True, True), - (True, False), - (False, False), - ], -) -def test_read_csv_with_options(tmp_path, as_read, global_ctx): - """Test reading CSV with CsvReadOptions.""" - - csv_content = "Alice;30;|New York; NY|\nBob;25\n#Charlie;35;Paris\nPhil;75;Detroit' MI\nKarin;50;|Stockholm\nSweden|" # noqa: E501 - - # Some of the read options are difficult to test in combination - # such as schema and schema_infer_max_records so run multiple tests - # file_sort_order doesn't impact reading, but included here to ensure - # all options parse correctly - options = CsvReadOptions( - has_header=False, - delimiter=";", - quote="|", - terminator="\n", - escape="\\", - comment="#", - newlines_in_values=True, - schema_infer_max_records=1, - null_regex="[pP]+aris", - truncated_rows=True, - file_sort_order=[[column("column_1").sort(), column("column_2")], ["column_3"]], - ) - - expected = pa.RecordBatch.from_arrays( - [ - pa.array(["Alice", "Bob", "Phil", "Karin"]), - pa.array([30, 25, 75, 50]), - pa.array(["New York; NY", None, "Detroit' MI", "Stockholm\nSweden"]), - ], - names=["column_1", "column_2", "column_3"], - ) - - read_csv_with_options_inner( - tmp_path, csv_content, options, expected, as_read, global_ctx - ) - - schema = pa.schema( - [ - pa.field("name", pa.string(), nullable=False), - pa.field("age", pa.float32(), nullable=False), - pa.field("location", pa.string(), nullable=True), - ] - ) - options.with_schema(schema) - - expected = pa.RecordBatch.from_arrays( - [ - pa.array(["Alice", "Bob", "Phil", "Karin"]), - pa.array([30.0, 25.0, 75.0, 50.0]), - pa.array(["New York; NY", None, "Detroit' MI", "Stockholm\nSweden"]), - ], - schema=schema, - ) - - read_csv_with_options_inner( - tmp_path, csv_content, options, expected, as_read, global_ctx - ) - - csv_content = "name,age\nAlice,30\nBob,25\nCharlie,35\nDiego,40\nEmily,15" - - expected = pa.RecordBatch.from_arrays( - [ - pa.array(["Alice", "Bob", "Charlie", "Diego", "Emily"]), - pa.array([30, 25, 35, 40, 15]), - pa.array(["a", "a", "a", "a", "a"]), - ], - schema=pa.schema( - [ - pa.field("name", pa.string(), nullable=True), - pa.field("age", pa.int64(), nullable=True), - pa.field("group", pa.string(), nullable=False), - ] - ), - ) - options = CsvReadOptions( - table_partition_cols=[("group", pa.string())], - ) - - read_csv_with_options_inner( - tmp_path, csv_content, options, expected, as_read, global_ctx - ) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py deleted file mode 100644 index 759d6278c..000000000 --- a/python/tests/test_dataframe.py +++ /dev/null @@ -1,3571 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -import ctypes -import datetime -import itertools -import os -import re -import threading -import time -from pathlib import Path -from typing import Any - -import pyarrow as pa -import pyarrow.parquet as pq -import pytest -from datafusion import ( - DataFrame, - InsertOp, - ParquetColumnOptions, - ParquetWriterOptions, - RecordBatch, - SessionContext, - WindowFrame, - column, - literal, - udf, -) -from datafusion import ( - col as df_col, -) -from datafusion import ( - functions as f, -) -from datafusion.dataframe import DataFrameWriteOptions -from datafusion.dataframe_formatter import ( - DataFrameHtmlFormatter, - configure_formatter, - get_formatter, - reset_formatter, -) -from datafusion.expr import EXPR_TYPE_ERROR, Window -from pyarrow.csv import write_csv - -pa_cffi = pytest.importorskip("pyarrow.cffi") - -MB = 1024 * 1024 - - -@pytest.fixture -def ctx(): - return SessionContext() - - -@pytest.fixture -def df(ctx): - # create a RecordBatch and a new DataFrame from it - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6]), pa.array([8, 5, 8])], - names=["a", "b", "c"], - ) - - return ctx.from_arrow(batch) - - -@pytest.fixture -def large_df(): - ctx = SessionContext() - - rows = 100000 - data = { - "a": list(range(rows)), - "b": [f"s-{i}" for i in range(rows)], - "c": [float(i + 0.1) for i in range(rows)], - } - batch = pa.record_batch(data) - - return ctx.from_arrow(batch) - - -@pytest.fixture -def large_multi_batch_df(): - """Create a DataFrame with multiple record batches for testing stream behavior. - - This fixture creates 10 batches of 10,000 rows each (100,000 rows total), - ensuring the DataFrame spans multiple batches. This is essential for testing - that memory limits actually cause early stream termination rather than - truncating all collected data. - """ - ctx = SessionContext() - - # Create multiple batches, each with 10,000 rows - batches = [] - rows_per_batch = 10000 - num_batches = 10 - - for batch_idx in range(num_batches): - start_row = batch_idx * rows_per_batch - end_row = start_row + rows_per_batch - data = { - "a": list(range(start_row, end_row)), - "b": [f"s-{i}" for i in range(start_row, end_row)], - "c": [float(i + 0.1) for i in range(start_row, end_row)], - } - batch = pa.record_batch(data) - batches.append(batch) - - # Register as record batches to maintain multi-batch structure - # Using [batches] wraps list in another list as required by register_record_batches - ctx.register_record_batches("large_multi_batch_data", [batches]) - return ctx.table("large_multi_batch_data") - - -@pytest.fixture -def struct_df(): - ctx = SessionContext() - - # create a RecordBatch and a new DataFrame from it - batch = pa.RecordBatch.from_arrays( - [pa.array([{"c": 1}, {"c": 2}, {"c": 3}]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - - return ctx.create_dataframe([[batch]]) - - -@pytest.fixture -def nested_df(): - ctx = SessionContext() - - # create a RecordBatch and a new DataFrame from it - # Intentionally make each array of different length - batch = pa.RecordBatch.from_arrays( - [pa.array([[1], [2, 3], [4, 5, 6], None]), pa.array([7, 8, 9, 10])], - names=["a", "b"], - ) - - return ctx.create_dataframe([[batch]]) - - -@pytest.fixture -def aggregate_df(): - ctx = SessionContext() - ctx.register_csv("test", "testing/data/csv/aggregate_test_100.csv") - return ctx.sql("select c1, sum(c2) from test group by c1") - - -@pytest.fixture -def partitioned_df(): - ctx = SessionContext() - - # create a RecordBatch and a new DataFrame from it - batch = pa.RecordBatch.from_arrays( - [ - pa.array([0, 1, 2, 3, 4, 5, 6]), - pa.array([7, None, 7, 8, 9, None, 9]), - pa.array(["A", "A", "A", "A", "B", "B", "B"]), - ], - names=["a", "b", "c"], - ) - - return ctx.create_dataframe([[batch]]) - - -@pytest.fixture -def clean_formatter_state(): - """Reset the HTML formatter after each test.""" - reset_formatter() - - -@pytest.fixture -def null_df(): - """Create a DataFrame with null values of different types.""" - ctx = SessionContext() - - # Create a RecordBatch with nulls across different types - batch = pa.RecordBatch.from_arrays( - [ - pa.array([1, None, 3, None], type=pa.int64()), - pa.array([4.5, 6.7, None, None], type=pa.float64()), - pa.array(["a", None, "c", None], type=pa.string()), - pa.array([True, None, False, None], type=pa.bool_()), - pa.array( - [10957, None, 18993, None], type=pa.date32() - ), # 2000-01-01, null, 2022-01-01, null - pa.array( - [946684800000, None, 1640995200000, None], type=pa.date64() - ), # 2000-01-01, null, 2022-01-01, null - ], - names=[ - "int_col", - "float_col", - "str_col", - "bool_col", - "date32_col", - "date64_col", - ], - ) - - return ctx.create_dataframe([[batch]]) - - -# custom style for testing with html formatter -class CustomStyleProvider: - def get_cell_style(self) -> str: - return ( - "background-color: #f5f5f5; color: #333; padding: 8px; border: " - "1px solid #ddd;" - ) - - def get_header_style(self) -> str: - return ( - "background-color: #4285f4; color: white; font-weight: bold; " - "padding: 10px; border: 1px solid #3367d6;" - ) - - -def count_table_rows(html_content: str) -> int: - """Count the number of table rows in HTML content. - Args: - html_content: HTML string to analyze - Returns: - Number of table rows found (number of tags) - """ - return len(re.findall(r" literal(2)).select( - column("a") + column("b"), - column("a") - column("b"), - ) - - # execute and collect the first (and only) batch - result = df1.collect()[0] - - assert result.column(0) == pa.array([9]) - assert result.column(1) == pa.array([-3]) - - df.show() - # verify that if there is no filter applied, internal dataframe is unchanged - df2 = df.filter() - assert df.df == df2.df - - df3 = df.filter(column("a") > literal(1), column("b") != literal(6)) - result = df3.collect()[0] - - assert result.column(0) == pa.array([2]) - assert result.column(1) == pa.array([5]) - assert result.column(2) == pa.array([5]) - - -def test_filter_string_predicates(df): - df_str = df.filter("a > 2") - result = df_str.collect()[0] - - assert result.column(0) == pa.array([3]) - assert result.column(1) == pa.array([6]) - assert result.column(2) == pa.array([8]) - - df_mixed = df.filter("a > 1", column("b") != literal(6)) - result_mixed = df_mixed.collect()[0] - - assert result_mixed.column(0) == pa.array([2]) - assert result_mixed.column(1) == pa.array([5]) - assert result_mixed.column(2) == pa.array([5]) - - df_strings = df.filter("a > 1", "b < 6") - result_strings = df_strings.collect()[0] - - assert result_strings.column(0) == pa.array([2]) - assert result_strings.column(1) == pa.array([5]) - assert result_strings.column(2) == pa.array([5]) - - -def test_parse_sql_expr(df): - plan1 = df.filter(df.parse_sql_expr("a > 2")).logical_plan() - plan2 = df.filter(column("a") > literal(2)).logical_plan() - # object equality not implemented but string representation should match - assert str(plan1) == str(plan2) - - df1 = df.filter(df.parse_sql_expr("a > 2")).select( - column("a") + column("b"), - column("a") - column("b"), - ) - - # execute and collect the first (and only) batch - result = df1.collect()[0] - - assert result.column(0) == pa.array([9]) - assert result.column(1) == pa.array([-3]) - - df.show() - # verify that if there is no filter applied, internal dataframe is unchanged - df2 = df.filter() - assert df.df == df2.df - - df3 = df.filter(df.parse_sql_expr("a > 1"), df.parse_sql_expr("b != 6")) - result = df3.collect()[0] - - assert result.column(0) == pa.array([2]) - assert result.column(1) == pa.array([5]) - assert result.column(2) == pa.array([5]) - - -def test_show_empty(df, capsys): - df_empty = df.filter(column("a") > literal(3)) - df_empty.show() - captured = capsys.readouterr() - assert "DataFrame has no rows" in captured.out - - -def test_sort(df): - df = df.sort(column("b").sort(ascending=False)) - - table = pa.Table.from_batches(df.collect()) - expected = {"a": [3, 2, 1], "b": [6, 5, 4], "c": [8, 5, 8]} - - assert table.to_pydict() == expected - - -def test_sort_string_and_expression_equivalent(df): - from datafusion import col - - result_str = df.sort("a").to_pydict() - result_expr = df.sort(col("a")).to_pydict() - assert result_str == result_expr - - -def test_sort_unsupported(df): - with pytest.raises( - TypeError, - match=f"Expected Expr or column name.*{re.escape(EXPR_TYPE_ERROR)}", - ): - df.sort(1) - - -def test_aggregate_string_and_expression_equivalent(df): - from datafusion import col - - result_str = df.aggregate("a", [f.count()]).sort("a").to_pydict() - result_expr = df.aggregate(col("a"), [f.count()]).sort("a").to_pydict() - assert result_str == result_expr - - -def test_aggregate_tuple_group_by(df): - result_list = df.aggregate(["a"], [f.count()]).sort("a").to_pydict() - result_tuple = df.aggregate(("a",), [f.count()]).sort("a").to_pydict() - assert result_tuple == result_list - - -def test_aggregate_tuple_aggs(df): - result_list = df.aggregate("a", [f.count()]).sort("a").to_pydict() - result_tuple = df.aggregate("a", (f.count(),)).sort("a").to_pydict() - assert result_tuple == result_list - - -def test_filter_string_equivalent(df): - df1 = df.filter("a > 1").to_pydict() - df2 = df.filter(column("a") > literal(1)).to_pydict() - assert df1 == df2 - - -def test_filter_string_invalid(df): - with pytest.raises(Exception) as excinfo: - df.filter("this is not valid sql").collect() - assert "Expected Expr" not in str(excinfo.value) - - -def test_drop(df): - df = df.drop("c") - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert df.schema().names == ["a", "b"] - assert result.column(0) == pa.array([1, 2, 3]) - assert result.column(1) == pa.array([4, 5, 6]) - - -def test_limit(df): - df = df.limit(1) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert len(result.column(0)) == 1 - assert len(result.column(1)) == 1 - - -def test_limit_with_offset(df): - # only 3 rows, but limit past the end to ensure that offset is working - df = df.limit(5, offset=2) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert len(result.column(0)) == 1 - assert len(result.column(1)) == 1 - - -def test_head(df): - df = df.head(1) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.column(0) == pa.array([1]) - assert result.column(1) == pa.array([4]) - assert result.column(2) == pa.array([8]) - - -def test_tail(df): - df = df.tail(1) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.column(0) == pa.array([3]) - assert result.column(1) == pa.array([6]) - assert result.column(2) == pa.array([8]) - - -def test_with_column_sql_expression(df): - df = df.with_column("c", "a + b") - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.schema.field(0).name == "a" - assert result.schema.field(1).name == "b" - assert result.schema.field(2).name == "c" - - assert result.column(0) == pa.array([1, 2, 3]) - assert result.column(1) == pa.array([4, 5, 6]) - assert result.column(2) == pa.array([5, 7, 9]) - - -def test_with_column(df): - df = df.with_column("c", column("a") + column("b")) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.schema.field(0).name == "a" - assert result.schema.field(1).name == "b" - assert result.schema.field(2).name == "c" - - assert result.column(0) == pa.array([1, 2, 3]) - assert result.column(1) == pa.array([4, 5, 6]) - assert result.column(2) == pa.array([5, 7, 9]) - - -def test_with_columns(df): - df = df.with_columns( - (column("a") + column("b")).alias("c"), - (column("a") + column("b")).alias("d"), - [ - (column("a") + column("b")).alias("e"), - (column("a") + column("b")).alias("f"), - ], - g=(column("a") + column("b")), - ) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.schema.field(0).name == "a" - assert result.schema.field(1).name == "b" - assert result.schema.field(2).name == "c" - assert result.schema.field(3).name == "d" - assert result.schema.field(4).name == "e" - assert result.schema.field(5).name == "f" - assert result.schema.field(6).name == "g" - - assert result.column(0) == pa.array([1, 2, 3]) - assert result.column(1) == pa.array([4, 5, 6]) - assert result.column(2) == pa.array([5, 7, 9]) - assert result.column(3) == pa.array([5, 7, 9]) - assert result.column(4) == pa.array([5, 7, 9]) - assert result.column(5) == pa.array([5, 7, 9]) - assert result.column(6) == pa.array([5, 7, 9]) - - -def test_with_columns_str(df): - df = df.with_columns( - "a + b as c", - "a + b as d", - [ - "a + b as e", - "a + b as f", - ], - g="a + b", - ) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.schema.field(0).name == "a" - assert result.schema.field(1).name == "b" - assert result.schema.field(2).name == "c" - assert result.schema.field(3).name == "d" - assert result.schema.field(4).name == "e" - assert result.schema.field(5).name == "f" - assert result.schema.field(6).name == "g" - - assert result.column(0) == pa.array([1, 2, 3]) - assert result.column(1) == pa.array([4, 5, 6]) - assert result.column(2) == pa.array([5, 7, 9]) - assert result.column(3) == pa.array([5, 7, 9]) - assert result.column(4) == pa.array([5, 7, 9]) - assert result.column(5) == pa.array([5, 7, 9]) - assert result.column(6) == pa.array([5, 7, 9]) - - -def test_cast(df): - df = df.cast({"a": pa.float16(), "b": pa.list_(pa.uint32())}) - expected = pa.schema( - [("a", pa.float16()), ("b", pa.list_(pa.uint32())), ("c", pa.int64())] - ) - - assert df.schema() == expected - - -def test_iter_batches(df): - batches = [] - for batch in df: - batches.append(batch) # noqa: PERF402 - - # Delete DataFrame to ensure RecordBatches remain valid - del df - - assert len(batches) == 1 - - batch = batches[0] - assert isinstance(batch, RecordBatch) - pa_batch = batch.to_pyarrow() - assert pa_batch.column(0).to_pylist() == [1, 2, 3] - assert pa_batch.column(1).to_pylist() == [4, 5, 6] - assert pa_batch.column(2).to_pylist() == [8, 5, 8] - - -def test_iter_returns_datafusion_recordbatch(df): - for batch in df: - assert isinstance(batch, RecordBatch) - - -def test_execute_stream_basic(df): - stream = df.execute_stream() - batches = list(stream) - - assert len(batches) == 1 - assert isinstance(batches[0], RecordBatch) - pa_batch = batches[0].to_pyarrow() - assert pa_batch.column(0).to_pylist() == [1, 2, 3] - assert pa_batch.column(1).to_pylist() == [4, 5, 6] - assert pa_batch.column(2).to_pylist() == [8, 5, 8] - - -def test_with_column_renamed(df): - df = df.with_column("c", column("a") + column("b")).with_column_renamed("c", "sum") - - result = df.collect()[0] - - assert result.schema.field(0).name == "a" - assert result.schema.field(1).name == "b" - assert result.schema.field(2).name == "sum" - - -def test_unnest(nested_df): - nested_df = nested_df.unnest_columns("a") - - # execute and collect the first (and only) batch - result = nested_df.collect()[0] - - assert result.column(0) == pa.array([1, 2, 3, 4, 5, 6, None]) - assert result.column(1) == pa.array([7, 8, 8, 9, 9, 9, 10]) - - -def test_unnest_without_nulls(nested_df): - nested_df = nested_df.unnest_columns("a", preserve_nulls=False) - - # execute and collect the first (and only) batch - result = nested_df.collect()[0] - - assert result.column(0) == pa.array([1, 2, 3, 4, 5, 6]) - assert result.column(1) == pa.array([7, 8, 8, 9, 9, 9]) - - -def test_join(): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - df = ctx.create_dataframe([[batch]], "l") - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2]), pa.array([8, 10])], - names=["a", "c"], - ) - df1 = ctx.create_dataframe([[batch]], "r") - - df2 = df.join(df1, on="a", how="inner") - df2 = df2.sort(column("a")) - table = pa.Table.from_batches(df2.collect()) - - expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} - assert table.to_pydict() == expected - - # Test the default behavior for dropping duplicate keys - # Since we may have a duplicate column name and pa.Table() - # hides the fact, instead we need to explicitly check the - # resultant arrays. - df2 = df.join( - df1, left_on="a", right_on="a", how="inner", coalesce_duplicate_keys=True - ) - df2 = df2.sort(column("a")) - result = df2.collect()[0] - assert result.num_columns == 3 - assert result.column(0) == pa.array([1, 2], pa.int64()) - assert result.column(1) == pa.array([4, 5], pa.int64()) - assert result.column(2) == pa.array([8, 10], pa.int64()) - - df2 = df.join( - df1, left_on="a", right_on="a", how="inner", coalesce_duplicate_keys=False - ) - df2 = df2.sort(column("l.a")) - result = df2.collect()[0] - assert result.num_columns == 4 - assert result.column(0) == pa.array([1, 2], pa.int64()) - assert result.column(1) == pa.array([4, 5], pa.int64()) - assert result.column(2) == pa.array([1, 2], pa.int64()) - assert result.column(3) == pa.array([8, 10], pa.int64()) - - # Verify we don't make a breaking change to pre-43.0.0 - # where users would pass join_keys as a positional argument - df2 = df.join(df1, (["a"], ["a"]), how="inner") - df2 = df2.sort(column("a")) - table = pa.Table.from_batches(df2.collect()) - - expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} - assert table.to_pydict() == expected - - -def test_join_invalid_params(): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - df = ctx.create_dataframe([[batch]], "l") - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2]), pa.array([8, 10])], - names=["a", "c"], - ) - df1 = ctx.create_dataframe([[batch]], "r") - - with pytest.deprecated_call(): - df2 = df.join(df1, join_keys=(["a"], ["a"]), how="inner") - df2.show() - df2 = df2.sort(column("a")) - table = pa.Table.from_batches(df2.collect()) - - expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} - assert table.to_pydict() == expected - - with pytest.raises( - ValueError, match=r"`left_on` or `right_on` should not provided with `on`" - ): - df2 = df.join(df1, on="a", how="inner", right_on="test") - - with pytest.raises( - ValueError, match=r"`left_on` and `right_on` should both be provided." - ): - df2 = df.join(df1, left_on="a", how="inner") - - with pytest.raises( - ValueError, match=r"either `on` or `left_on` and `right_on` should be provided." - ): - df2 = df.join(df1, how="inner") - - -def test_join_on(): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - df = ctx.create_dataframe([[batch]], "l") - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2]), pa.array([-8, 10])], - names=["a", "c"], - ) - df1 = ctx.create_dataframe([[batch]], "r") - - df2 = df.join_on(df1, column("l.a").__eq__(column("r.a")), how="inner") - df2.show() - df2 = df2.sort(column("l.a")) - table = pa.Table.from_batches(df2.collect()) - - expected = {"a": [1, 2], "c": [-8, 10], "b": [4, 5]} - assert table.to_pydict() == expected - - df3 = df.join_on( - df1, - column("l.a").__eq__(column("r.a")), - column("l.a").__lt__(column("r.c")), - how="inner", - ) - df3.show() - df3 = df3.sort(column("l.a")) - table = pa.Table.from_batches(df3.collect()) - expected = {"a": [2], "c": [10], "b": [5]} - assert table.to_pydict() == expected - - -def test_join_full_with_drop_duplicate_keys(): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 3, 5, 7, 9]), pa.array([True, True, True, True, True])], - names=["log_time", "key_frame"], - ) - key_frame = ctx.create_dataframe([[batch]]) - - batch = pa.RecordBatch.from_arrays( - [pa.array([2, 4, 6, 8, 10])], - names=["log_time"], - ) - query_times = ctx.create_dataframe([[batch]]) - - merged = query_times.join( - key_frame, - left_on="log_time", - right_on="log_time", - how="full", - coalesce_duplicate_keys=True, - ) - merged = merged.sort(column("log_time")) - result = merged.collect()[0] - - assert result.num_columns == 2 - assert result.column(0).to_pylist() == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - - -def test_join_on_invalid_expr(): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2]), pa.array([4, 5])], - names=["a", "b"], - ) - df = ctx.create_dataframe([[batch]], "l") - df1 = ctx.create_dataframe([[batch]], "r") - - with pytest.raises( - TypeError, match=r"Use col\(\)/column\(\) or lit\(\)/literal\(\)" - ): - df.join_on(df1, "a") - - -def test_aggregate_invalid_aggs(df): - with pytest.raises( - TypeError, match=r"Use col\(\)/column\(\) or lit\(\)/literal\(\)" - ): - df.aggregate([], "a") - - -def test_distinct(): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3, 1, 2, 3]), pa.array([4, 5, 6, 4, 5, 6])], - names=["a", "b"], - ) - df_a = ctx.create_dataframe([[batch]]).distinct().sort(column("a")) - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - df_b = ctx.create_dataframe([[batch]]).sort(column("a")) - - assert df_a.collect() == df_b.collect() - - -data_test_window_functions = [ - ( - "row", - f.row_number(order_by=[column("b"), column("a").sort(ascending=False)]), - [4, 2, 3, 5, 7, 1, 6], - ), - ( - "row_w_params", - f.row_number( - order_by=[column("b"), column("a")], - partition_by=[column("c")], - ), - [2, 1, 3, 4, 2, 1, 3], - ), - ( - "row_w_params_no_lists", - f.row_number( - order_by=column("b"), - partition_by=column("c"), - ), - [2, 1, 3, 4, 2, 1, 3], - ), - ("rank", f.rank(order_by=[column("b")]), [3, 1, 3, 5, 6, 1, 6]), - ( - "rank_w_params", - f.rank(order_by=[column("b"), column("a")], partition_by=[column("c")]), - [2, 1, 3, 4, 2, 1, 3], - ), - ( - "rank_w_params_no_lists", - f.rank(order_by=column("a"), partition_by=column("c")), - [1, 2, 3, 4, 1, 2, 3], - ), - ( - "dense_rank", - f.dense_rank(order_by=[column("b")]), - [2, 1, 2, 3, 4, 1, 4], - ), - ( - "dense_rank_w_params", - f.dense_rank(order_by=[column("b"), column("a")], partition_by=[column("c")]), - [2, 1, 3, 4, 2, 1, 3], - ), - ( - "dense_rank_w_params_no_lists", - f.dense_rank(order_by=column("a"), partition_by=column("c")), - [1, 2, 3, 4, 1, 2, 3], - ), - ( - "percent_rank", - f.round(f.percent_rank(order_by=[column("b")]), literal(3)), - [0.333, 0.0, 0.333, 0.667, 0.833, 0.0, 0.833], - ), - ( - "percent_rank_w_params", - f.round( - f.percent_rank( - order_by=[column("b"), column("a")], partition_by=[column("c")] - ), - literal(3), - ), - [0.333, 0.0, 0.667, 1.0, 0.5, 0.0, 1.0], - ), - ( - "percent_rank_w_params_no_lists", - f.round( - f.percent_rank(order_by=column("a"), partition_by=column("c")), - literal(3), - ), - [0.0, 0.333, 0.667, 1.0, 0.0, 0.5, 1.0], - ), - ( - "cume_dist", - f.round(f.cume_dist(order_by=[column("b")]), literal(3)), - [0.571, 0.286, 0.571, 0.714, 1.0, 0.286, 1.0], - ), - ( - "cume_dist_w_params", - f.round( - f.cume_dist( - order_by=[column("b"), column("a")], partition_by=[column("c")] - ), - literal(3), - ), - [0.5, 0.25, 0.75, 1.0, 0.667, 0.333, 1.0], - ), - ( - "cume_dist_w_params_no_lists", - f.round( - f.cume_dist(order_by=column("a"), partition_by=column("c")), - literal(3), - ), - [0.25, 0.5, 0.75, 1.0, 0.333, 0.667, 1.0], - ), - ( - "ntile", - f.ntile(2, order_by=[column("b")]), - [1, 1, 1, 2, 2, 1, 2], - ), - ( - "ntile_w_params", - f.ntile(2, order_by=[column("b"), column("a")], partition_by=[column("c")]), - [1, 1, 2, 2, 1, 1, 2], - ), - ( - "ntile_w_params_no_lists", - f.ntile(2, order_by=column("b"), partition_by=column("c")), - [1, 1, 2, 2, 1, 1, 2], - ), - ("lead", f.lead(column("b"), order_by=[column("b")]), [7, None, 8, 9, 9, 7, None]), - ( - "lead_w_params", - f.lead( - column("b"), - shift_offset=2, - default_value=-1, - order_by=[column("b"), column("a")], - partition_by=[column("c")], - ), - [8, 7, -1, -1, -1, 9, -1], - ), - ( - "lead_w_params_no_lists", - f.lead( - column("b"), - shift_offset=2, - default_value=-1, - order_by=column("b"), - partition_by=column("c"), - ), - [8, 7, -1, -1, -1, 9, -1], - ), - ("lag", f.lag(column("b"), order_by=[column("b")]), [None, None, 7, 7, 8, None, 9]), - ( - "lag_w_params", - f.lag( - column("b"), - shift_offset=2, - default_value=-1, - order_by=[column("b"), column("a")], - partition_by=[column("c")], - ), - [-1, -1, None, 7, -1, -1, None], - ), - ( - "lag_w_params_no_lists", - f.lag( - column("b"), - shift_offset=2, - default_value=-1, - order_by=column("b"), - partition_by=column("c"), - ), - [-1, -1, None, 7, -1, -1, None], - ), - ( - "first_value", - f.first_value(column("a")).over( - Window(partition_by=[column("c")], order_by=[column("b")]) - ), - [1, 1, 1, 1, 5, 5, 5], - ), - ( - "first_value_without_list_args", - f.first_value(column("a")).over( - Window(partition_by=column("c"), order_by=column("b")) - ), - [1, 1, 1, 1, 5, 5, 5], - ), - ( - "first_value_order_by_string", - f.first_value(column("a")).over( - Window(partition_by=[column("c")], order_by="b") - ), - [1, 1, 1, 1, 5, 5, 5], - ), - ( - "last_value", - f.last_value(column("a")).over( - Window( - partition_by=[column("c")], - order_by=[column("b")], - window_frame=WindowFrame("rows", None, None), - ) - ), - [3, 3, 3, 3, 6, 6, 6], - ), - ( - "3rd_value", - f.nth_value(column("b"), 3).over(Window(order_by=[column("a")])), - [None, None, 7, 7, 7, 7, 7], - ), - ( - "avg", - f.round(f.avg(column("b")).over(Window(order_by=[column("a")])), literal(3)), - [7.0, 7.0, 7.0, 7.333, 7.75, 7.75, 8.0], - ), -] - - -@pytest.mark.parametrize(("name", "expr", "result"), data_test_window_functions) -def test_window_functions(partitioned_df, name, expr, result): - df = partitioned_df.select( - column("a"), column("b"), column("c"), f.alias(expr, name) - ) - df.sort(column("a")).show() - table = pa.Table.from_batches(df.collect()) - - expected = { - "a": [0, 1, 2, 3, 4, 5, 6], - "b": [7, None, 7, 8, 9, None, 9], - "c": ["A", "A", "A", "A", "B", "B", "B"], - name: result, - } - - assert table.sort_by("a").to_pydict() == expected - - -@pytest.mark.parametrize("partition", ["c", df_col("c")]) -def test_rank_partition_by_accepts_string(partitioned_df, partition): - """Passing a string to partition_by should match using col().""" - df = partitioned_df.select( - f.rank(order_by=column("a"), partition_by=partition).alias("r") - ) - table = pa.Table.from_batches(df.sort(column("a")).collect()) - assert table.column("r").to_pylist() == [1, 2, 3, 4, 1, 2, 3] - - -@pytest.mark.parametrize("partition", ["c", df_col("c")]) -def test_window_partition_by_accepts_string(partitioned_df, partition): - """Window.partition_by accepts string identifiers.""" - expr = f.first_value(column("a")).over( - Window(partition_by=partition, order_by=column("b")) - ) - df = partitioned_df.select(expr.alias("fv")) - table = pa.Table.from_batches(df.sort(column("a")).collect()) - assert table.column("fv").to_pylist() == [1, 1, 1, 1, 5, 5, 5] - - -@pytest.mark.parametrize( - ("units", "start_bound", "end_bound"), - [ - (units, start_bound, end_bound) - for units in ("rows", "range") - for start_bound in (None, 0, 1) - for end_bound in (None, 0, 1) - ] - + [ - ("groups", 0, 0), - ], -) -def test_valid_window_frame(units, start_bound, end_bound): - WindowFrame(units, start_bound, end_bound) - - -@pytest.mark.parametrize( - ("units", "start_bound", "end_bound"), - [ - ("invalid-units", 0, None), - ("invalid-units", None, 0), - ("invalid-units", None, None), - ("groups", None, 0), - ("groups", 0, None), - ("groups", None, None), - ], -) -def test_invalid_window_frame(units, start_bound, end_bound): - with pytest.raises(NotImplementedError, match=f"(?i){units}"): - WindowFrame(units, start_bound, end_bound) - - -def test_window_frame_defaults_match_postgres(partitioned_df): - col_a = column("a") - - # When order is not set, the default frame should be unbounded preceding to - # unbounded following. When order is set, the default frame is unbounded preceding - # to current row. - no_order = f.avg(col_a).over(Window()).alias("over_no_order") - with_order = f.avg(col_a).over(Window(order_by=[col_a])).alias("over_with_order") - df = partitioned_df.select(col_a, no_order, with_order) - - expected = { - "a": [0, 1, 2, 3, 4, 5, 6], - "over_no_order": [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0], - "over_with_order": [0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0], - } - - assert df.sort(col_a).to_pydict() == expected - - -def _build_last_value_df(df): - return df.select( - f.last_value(column("a")) - .over( - Window( - partition_by=[column("c")], - order_by=[column("b")], - window_frame=WindowFrame("rows", None, None), - ) - ) - .alias("expr"), - f.last_value(column("a")) - .over( - Window( - partition_by=[column("c")], - order_by="b", - window_frame=WindowFrame("rows", None, None), - ) - ) - .alias("str"), - ) - - -def _build_nth_value_df(df): - return df.select( - f.nth_value(column("b"), 3).over(Window(order_by=[column("a")])).alias("expr"), - f.nth_value(column("b"), 3).over(Window(order_by="a")).alias("str"), - ) - - -def _build_rank_df(df): - return df.select( - f.rank(order_by=[column("b")]).alias("expr"), - f.rank(order_by="b").alias("str"), - ) - - -def _build_array_agg_df(df): - return df.aggregate( - [column("c")], - [ - f.array_agg(column("a"), order_by=[column("a")]).alias("expr"), - f.array_agg(column("a"), order_by="a").alias("str"), - ], - ).sort(column("c")) - - -@pytest.mark.parametrize( - ("builder", "expected"), - [ - pytest.param(_build_last_value_df, [3, 3, 3, 3, 6, 6, 6], id="last_value"), - pytest.param(_build_nth_value_df, [None, None, 7, 7, 7, 7, 7], id="nth_value"), - pytest.param(_build_rank_df, [1, 1, 3, 3, 5, 6, 6], id="rank"), - pytest.param(_build_array_agg_df, [[0, 1, 2, 3], [4, 5, 6]], id="array_agg"), - ], -) -def test_order_by_string_equivalence(partitioned_df, builder, expected): - df = builder(partitioned_df) - table = pa.Table.from_batches(df.collect()) - assert table.column("expr").to_pylist() == expected - assert table.column("expr").to_pylist() == table.column("str").to_pylist() - - -def test_html_formatter_cell_dimension(df, clean_formatter_state): - """Test configuring the HTML formatter with different options.""" - # Configure with custom settings - configure_formatter( - max_width=500, - max_height=200, - enable_cell_expansion=False, - ) - - html_output = df._repr_html_() - - # Verify our configuration was applied - assert "max-height: 200px" in html_output - assert "max-width: 500px" in html_output - # With cell expansion disabled, we shouldn't see expandable-container elements - assert "expandable-container" not in html_output - - -def test_html_formatter_custom_style_provider(df, clean_formatter_state): - """Test using custom style providers with the HTML formatter.""" - - # Configure with custom style provider - configure_formatter(style_provider=CustomStyleProvider()) - - html_output = df._repr_html_() - - # Verify our custom styles were applied - assert "background-color: #4285f4" in html_output - assert "color: white" in html_output - assert "background-color: #f5f5f5" in html_output - - -def test_html_formatter_type_formatters(df, clean_formatter_state): - """Test registering custom type formatters for specific data types.""" - - # Get current formatter and register custom formatters - formatter = get_formatter() - - # Format integers with color based on value - # Using int as the type for the formatter will work since we convert - # Arrow scalar values to Python native types in _get_cell_value - def format_int(value): - return f' 2 else "blue"}">{value}' - - formatter.register_formatter(int, format_int) - - html_output = df._repr_html_() - - # Our test dataframe has values 1,2,3 so we should see: - assert '1' in html_output - - -def test_html_formatter_custom_cell_builder(df, clean_formatter_state): - """Test using a custom cell builder function.""" - - # Create a custom cell builder with distinct styling for different value ranges - def custom_cell_builder(value, row, col, table_id): - try: - num_value = int(value) - if num_value > 5: # Values > 5 get green background with indicator - return ( - '' - ) - if num_value < 3: # Values < 3 get blue background with indicator - return ( - '' - ) - except (ValueError, TypeError): - pass - - # Default styling for other cells (3, 4, 5) - return f'' - - # Set our custom cell builder - formatter = get_formatter() - formatter.set_custom_cell_builder(custom_cell_builder) - - html_output = df._repr_html_() - - # Extract cells with specific styling using regex - low_cells = re.findall( - r'', html_output - ) - mid_cells = re.findall( - r'', html_output - ) - high_cells = re.findall( - r'', html_output - ) - - # Sort the extracted values for consistent comparison - low_cells = sorted(map(int, low_cells)) - mid_cells = sorted(map(int, mid_cells)) - high_cells = sorted(map(int, high_cells)) - - # Verify specific values have the correct styling applied - assert low_cells == [1, 2] # Values < 3 - assert mid_cells == [3, 4, 5, 5] # Values 3-5 - assert high_cells == [6, 8, 8] # Values > 5 - - # Verify the exact content with styling appears in the output - assert ( - '' - in html_output - ) - assert ( - '' - in html_output - ) - assert ( - '' in html_output - ) - assert ( - '' in html_output - ) - assert ( - '' - in html_output - ) - assert ( - '' - in html_output - ) - - # Count occurrences to ensure all cells are properly styled - assert html_output.count("-low") == 2 # Two low values (1, 2) - assert html_output.count("-mid") == 4 # Four mid values (3, 4, 5, 5) - assert html_output.count("-high") == 3 # Three high values (6, 8, 8) - - # Create a custom cell builder that changes background color based on value - def custom_cell_builder(value, row, col, table_id): - # Handle numeric values regardless of their exact type - try: - num_value = int(value) - if num_value > 5: # Values > 5 get green background - return f'' - if num_value < 3: # Values < 3 get light blue background - return f'' - except (ValueError, TypeError): - pass - - # Default styling for other cells - return f'' - - # Set our custom cell builder - formatter = get_formatter() - formatter.set_custom_cell_builder(custom_cell_builder) - - html_output = df._repr_html_() - - # Verify our custom cell styling was applied - assert "background-color: #d3e9f0" in html_output # For values 1,2 - - -def test_html_formatter_custom_header_builder(df, clean_formatter_state): - """Test using a custom header builder function.""" - - # Create a custom header builder with tooltips - def custom_header_builder(field): - tooltips = { - "a": "Primary key column", - "b": "Secondary values", - "c": "Additional data", - } - tooltip = tooltips.get(field.name, "") - return ( - f'' - ) - - # Set our custom header builder - formatter = get_formatter() - formatter.set_custom_header_builder(custom_header_builder) - - html_output = df._repr_html_() - - # Verify our custom headers were applied - assert 'title="Primary key column"' in html_output - assert 'title="Secondary values"' in html_output - assert "background-color: #333; color: white" in html_output - - -def test_html_formatter_complex_customization(df, clean_formatter_state): - """Test combining multiple customization options together.""" - - # Create a dark mode style provider - class DarkModeStyleProvider: - def get_cell_style(self) -> str: - return ( - "background-color: #222; color: #eee; " - "padding: 8px; border: 1px solid #444;" - ) - - def get_header_style(self) -> str: - return ( - "background-color: #111; color: #fff; padding: 10px; " - "border: 1px solid #333;" - ) - - # Configure with dark mode style - configure_formatter( - max_cell_length=10, - style_provider=DarkModeStyleProvider(), - custom_css=""" - .datafusion-table { - font-family: monospace; - border-collapse: collapse; - } - .datafusion-table tr:hover td { - background-color: #444 !important; - } - """, - ) - - # Add type formatters for special formatting - now working with native int values - formatter = get_formatter() - formatter.register_formatter( - int, - lambda n: f'{n}', - ) - - html_output = df._repr_html_() - - # Verify our customizations were applied - assert "background-color: #222" in html_output - assert "background-color: #111" in html_output - assert ".datafusion-table" in html_output - assert "color: #5af" in html_output # Even numbers - - -def test_html_formatter_memory(df, clean_formatter_state): - """Test the memory and row control parameters in DataFrameHtmlFormatter.""" - configure_formatter(max_memory_bytes=10, min_rows=1) - html_output = df._repr_html_() - - # Count the number of table rows in the output - tr_count = count_table_rows(html_output) - # With a tiny memory limit of 10 bytes, the formatter should display - # the minimum number of rows (1) plus a message about truncation - assert tr_count == 2 # 1 for header row, 1 for data row - assert "data truncated" in html_output.lower() - - configure_formatter(max_memory_bytes=10 * MB, min_rows=1) - html_output = df._repr_html_() - # With larger memory limit and min_rows=2, should display all rows - tr_count = count_table_rows(html_output) - # Table should have header row (1) + 3 data rows = 4 rows - assert tr_count == 4 - # No truncation message should appear - assert "data truncated" not in html_output.lower() - - -def test_html_formatter_memory_boundary_conditions(large_df, clean_formatter_state): - """Test memory limit behavior at boundary conditions with large dataset. - - This test validates that the formatter correctly handles edge cases when - the memory limit is reached with a large dataset (100,000 rows), ensuring - that min_rows constraint is properly respected while respecting memory limits. - Uses large_df to actually test memory limit behavior with realistic data sizes. - """ - - # Get the raw size of the data to test boundary conditions - # First, capture output with no limits - # NOTE: max_rows=200000 is set well above the dataset size (100k rows) to ensure - # we're testing memory limits, not row limits. Default max_rows=10 would - # truncate before memory limit is reached. - configure_formatter(max_memory_bytes=10 * MB, min_rows=1, max_rows=200000) - unrestricted_output = large_df._repr_html_() - unrestricted_rows = count_table_rows(unrestricted_output) - - # Test 1: Very small memory limit should still respect min_rows - # With large dataset, this should definitely hit memory limit before min_rows - configure_formatter(max_memory_bytes=10, min_rows=1) - html_output = large_df._repr_html_() - tr_count = count_table_rows(html_output) - assert tr_count >= 2 # At least header + 1 data row (minimum) - # Should show truncation since we limited memory so aggressively - assert "data truncated" in html_output.lower() - - # Test 2: Memory limit at default size (2MB) should truncate the large dataset - # Default max_rows would truncate at 10 rows, so we don't set it here to test - # that memory limit is respected even with default row limit - configure_formatter(max_memory_bytes=2 * MB, min_rows=1) - html_output = large_df._repr_html_() - tr_count = count_table_rows(html_output) - assert tr_count >= 2 # At least header + min_rows - # Should be truncated since full dataset is much larger than 2MB - assert tr_count < unrestricted_rows - - # Test 3: Very large memory limit should show much more data - # NOTE: max_rows=200000 is critical here - without it, default max_rows=10 - # would limit output to 10 rows even though we have 100MB of memory available - configure_formatter(max_memory_bytes=100 * MB, min_rows=1, max_rows=200000) - html_output = large_df._repr_html_() - tr_count = count_table_rows(html_output) - # Should show significantly more rows, possibly all - assert tr_count > 100 # Should show substantially more rows - - # Test 4: Min rows should override memory limit - # With tiny memory and larger min_rows, min_rows should win - configure_formatter(max_memory_bytes=10, min_rows=2) - html_output = large_df._repr_html_() - tr_count = count_table_rows(html_output) - assert tr_count >= 3 # At least header + 2 data rows (min_rows) - # Should show truncation message despite min_rows being satisfied - assert "data truncated" in html_output.lower() - - # Test 5: With reasonable memory and min_rows settings - # NOTE: max_rows=200000 ensures we test memory limit behavior, not row limit - configure_formatter(max_memory_bytes=2 * MB, min_rows=10, max_rows=200000) - html_output = large_df._repr_html_() - tr_count = count_table_rows(html_output) - assert tr_count >= 11 # header + at least 10 data rows (min_rows) - # Should be truncated due to memory limit - assert tr_count < unrestricted_rows - - -def test_html_formatter_stream_early_termination( - large_multi_batch_df, clean_formatter_state -): - """Test that memory limits cause early stream termination with multi-batch data. - - This test specifically validates that the formatter stops collecting data when - the memory limit is reached, rather than collecting all data and then truncating. - The large_multi_batch_df fixture creates 10 record batches, allowing us to verify - that not all batches are consumed when memory limit is hit. - - Key difference from test_html_formatter_memory_boundary_conditions: - - Uses multi-batch DataFrame to verify stream termination behavior - - Tests with memory limit exceeded by 2-3 batches but not 1 batch - - Verifies partial data + truncation message + respects min_rows - """ - - # Get baseline: how much data fits without memory limit - configure_formatter(max_memory_bytes=100 * MB, min_rows=1, max_rows=200000) - unrestricted_output = large_multi_batch_df._repr_html_() - unrestricted_rows = count_table_rows(unrestricted_output) - - # Test 1: Memory limit exceeded by ~2 batches (each batch ~10k rows) - # With 1 batch (~1-2MB), we should have space. With 2-3 batches, we exceed limit. - # Set limit to ~3MB to ensure we collect ~1 batch before hitting limit - configure_formatter(max_memory_bytes=3 * MB, min_rows=1, max_rows=200000) - html_output = large_multi_batch_df._repr_html_() - tr_count = count_table_rows(html_output) - - # Should show significant truncation (not all 100k rows) - assert tr_count < unrestricted_rows, "Should be truncated by memory limit" - assert tr_count >= 2, "Should respect min_rows" - assert "data truncated" in html_output.lower(), "Should indicate truncation" - - # Test 2: Very tight memory limit should still respect min_rows - # Even with tiny memory (10 bytes), should show at least min_rows - configure_formatter(max_memory_bytes=10, min_rows=5, max_rows=200000) - html_output = large_multi_batch_df._repr_html_() - tr_count = count_table_rows(html_output) - - assert tr_count >= 6, "Should show header + at least min_rows (5)" - assert "data truncated" in html_output.lower(), "Should indicate truncation" - - # Test 3: Memory limit should take precedence over max_rows in early termination - # With max_rows=100 but small memory limit, should terminate early due to memory - configure_formatter(max_memory_bytes=2 * MB, min_rows=1, max_rows=100) - html_output = large_multi_batch_df._repr_html_() - tr_count = count_table_rows(html_output) - - # Should be truncated by memory limit (showing more than max_rows would suggest - # but less than unrestricted) - assert tr_count >= 2, "Should respect min_rows" - assert tr_count < unrestricted_rows, "Should be truncated" - # Output should indicate why truncation occurred - assert "data truncated" in html_output.lower() - - -def test_html_formatter_max_rows(df, clean_formatter_state): - configure_formatter(min_rows=2, max_rows=2) - html_output = df._repr_html_() - - tr_count = count_table_rows(html_output) - # Table should have header row (1) + 2 data rows = 3 rows - assert tr_count == 3 - - configure_formatter(min_rows=2, max_rows=3) - html_output = df._repr_html_() - - tr_count = count_table_rows(html_output) - # Table should have header row (1) + 3 data rows = 4 rows - assert tr_count == 4 - - -def test_html_formatter_validation(): - # Test validation for invalid parameters - - with pytest.raises(ValueError, match="max_cell_length must be a positive integer"): - DataFrameHtmlFormatter(max_cell_length=0) - - with pytest.raises(ValueError, match="max_width must be a positive integer"): - DataFrameHtmlFormatter(max_width=0) - - with pytest.raises(ValueError, match="max_height must be a positive integer"): - DataFrameHtmlFormatter(max_height=0) - - with pytest.raises(ValueError, match="max_memory_bytes must be a positive integer"): - DataFrameHtmlFormatter(max_memory_bytes=0) - - with pytest.raises(ValueError, match="max_memory_bytes must be a positive integer"): - DataFrameHtmlFormatter(max_memory_bytes=-100) - - with pytest.raises(ValueError, match="min_rows must be a positive integer"): - DataFrameHtmlFormatter(min_rows=0) - - with pytest.raises(ValueError, match="min_rows must be a positive integer"): - DataFrameHtmlFormatter(min_rows=-5) - - with pytest.raises(ValueError, match="max_rows must be a positive integer"): - DataFrameHtmlFormatter(max_rows=0) - - with pytest.raises(ValueError, match="max_rows must be a positive integer"): - DataFrameHtmlFormatter(max_rows=-10) - - with pytest.raises( - ValueError, match="min_rows must be less than or equal to max_rows" - ): - DataFrameHtmlFormatter(min_rows=5, max_rows=4) - - -def test_repr_rows_backward_compatibility(clean_formatter_state): - """Test that repr_rows parameter still works as deprecated alias.""" - # Should work when not conflicting with max_rows - with pytest.warns(DeprecationWarning, match="repr_rows parameter is deprecated"): - formatter = DataFrameHtmlFormatter(repr_rows=15, min_rows=10) - assert formatter.max_rows == 15 - assert formatter.repr_rows == 15 - - # Should fail when conflicting with max_rows - with pytest.raises(ValueError, match="Cannot specify both repr_rows and max_rows"): - DataFrameHtmlFormatter(repr_rows=5, max_rows=10) - - # Setting repr_rows via property should warn - formatter2 = DataFrameHtmlFormatter() - with pytest.warns(DeprecationWarning, match="repr_rows is deprecated"): - formatter2.repr_rows = 7 - assert formatter2.max_rows == 7 - assert formatter2.repr_rows == 7 - - -def test_configure_formatter(df, clean_formatter_state): - """Test using custom style providers with the HTML formatter and configured - parameters.""" - - # these are non-default values - max_cell_length = 10 - max_width = 500 - max_height = 30 - max_memory_bytes = 3 * MB - min_rows = 2 - max_rows = 2 - enable_cell_expansion = False - show_truncation_message = False - use_shared_styles = False - - reset_formatter() - formatter_default = get_formatter() - - assert formatter_default.max_cell_length != max_cell_length - assert formatter_default.max_width != max_width - assert formatter_default.max_height != max_height - assert formatter_default.max_memory_bytes != max_memory_bytes - assert formatter_default.min_rows != min_rows - assert formatter_default.max_rows != max_rows - assert formatter_default.enable_cell_expansion != enable_cell_expansion - assert formatter_default.show_truncation_message != show_truncation_message - assert formatter_default.use_shared_styles != use_shared_styles - - # Configure with custom style provider and additional parameters - configure_formatter( - max_cell_length=max_cell_length, - max_width=max_width, - max_height=max_height, - max_memory_bytes=max_memory_bytes, - min_rows=min_rows, - max_rows=max_rows, - enable_cell_expansion=enable_cell_expansion, - show_truncation_message=show_truncation_message, - use_shared_styles=use_shared_styles, - ) - formatter_custom = get_formatter() - assert formatter_custom.max_cell_length == max_cell_length - assert formatter_custom.max_width == max_width - assert formatter_custom.max_height == max_height - assert formatter_custom.max_memory_bytes == max_memory_bytes - assert formatter_custom.min_rows == min_rows - assert formatter_custom.max_rows == max_rows - assert formatter_custom.enable_cell_expansion == enable_cell_expansion - assert formatter_custom.show_truncation_message == show_truncation_message - assert formatter_custom.use_shared_styles == use_shared_styles - - -def test_configure_formatter_invalid_params(clean_formatter_state): - """Test that configure_formatter rejects invalid parameters.""" - with pytest.raises(ValueError, match="Invalid formatter parameters"): - configure_formatter(invalid_param=123) - - # Test with multiple parameters, one valid and one invalid - with pytest.raises(ValueError, match="Invalid formatter parameters"): - configure_formatter(max_width=500, not_a_real_param="test") - - # Test with multiple invalid parameters - with pytest.raises(ValueError, match="Invalid formatter parameters"): - configure_formatter(fake_param1="test", fake_param2=456) - - -def test_get_dataframe(tmp_path): - ctx = SessionContext() - - path = tmp_path / "test.csv" - table = pa.Table.from_arrays( - [ - [1, 2, 3, 4], - ["a", "b", "c", "d"], - [1.1, 2.2, 3.3, 4.4], - ], - names=["int", "str", "float"], - ) - write_csv(table, path) - - ctx.register_csv("csv", path) - - df = ctx.table("csv") - assert isinstance(df, DataFrame) - - -def test_struct_select(struct_df): - df = struct_df.select( - column("a")["c"] + column("b"), - column("a")["c"] - column("b"), - ) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.column(0) == pa.array([5, 7, 9]) - assert result.column(1) == pa.array([-3, -3, -3]) - - -def test_explain(df): - df = df.select( - column("a") + column("b"), - column("a") - column("b"), - ) - df.explain() - - -def test_logical_plan(aggregate_df): - plan = aggregate_df.logical_plan() - - expected = "Projection: test.c1, sum(test.c2)" - - assert expected == plan.display() - - expected = ( - "Projection: test.c1, sum(test.c2)\n" - " Aggregate: groupBy=[[test.c1]], aggr=[[sum(test.c2)]]\n" - " TableScan: test" - ) - - assert expected == plan.display_indent() - - -def test_optimized_logical_plan(aggregate_df): - plan = aggregate_df.optimized_logical_plan() - - expected = "Aggregate: groupBy=[[test.c1]], aggr=[[sum(test.c2)]]" - - assert expected == plan.display() - - expected = ( - "Aggregate: groupBy=[[test.c1]], aggr=[[sum(test.c2)]]\n" - " TableScan: test projection=[c1, c2]" - ) - - assert expected == plan.display_indent() - - -def test_execution_plan(aggregate_df): - plan = aggregate_df.execution_plan() - - expected = ( - "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[sum(test.c2)]\n" - ) - - assert expected == plan.display() - - # Check the number of partitions is as expected. - assert isinstance(plan.partition_count, int) - - expected = ( - "ProjectionExec: expr=[c1@0 as c1, SUM(test.c2)@1 as SUM(test.c2)]\n" - " Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" - " TableScan: test projection=[c1, c2]" - ) - - indent = plan.display_indent() - - # indent plan will be different for everyone due to absolute path - # to filename, so we just check for some expected content - assert "AggregateExec:" in indent - assert "RepartitionExec:" in indent - assert "DataSourceExec:" in indent - assert "file_type=csv" in indent - - ctx = SessionContext() - rows_returned = 0 - for idx in range(plan.partition_count): - stream = ctx.execute(plan, idx) - try: - batch = stream.next() - assert batch is not None - rows_returned += len(batch.to_pyarrow()[0]) - except StopIteration: - # This is one of the partitions with no values - pass - with pytest.raises(StopIteration): - stream.next() - - assert rows_returned == 5 - - -@pytest.mark.asyncio -async def test_async_iteration_of_df(aggregate_df): - rows_returned = 0 - async for batch in aggregate_df: - assert batch is not None - rows_returned += len(batch.to_pyarrow()[0]) - - assert rows_returned == 5 - - -def test_repartition(df): - df.repartition(2) - - -def test_repartition_by_hash(df): - df.repartition_by_hash(column("a"), num=2) - - -def test_repartition_by_hash_sql_expression(df): - df.repartition_by_hash("a", num=2) - - -def test_repartition_by_hash_mix(df): - df.repartition_by_hash(column("a"), "b", num=2) - - -def test_intersect(): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - df_a = ctx.create_dataframe([[batch]]) - - batch = pa.RecordBatch.from_arrays( - [pa.array([3, 4, 5]), pa.array([6, 7, 8])], - names=["a", "b"], - ) - df_b = ctx.create_dataframe([[batch]]) - - batch = pa.RecordBatch.from_arrays( - [pa.array([3]), pa.array([6])], - names=["a", "b"], - ) - df_c = ctx.create_dataframe([[batch]]).sort(column("a")) - - df_a_i_b = df_a.intersect(df_b).sort(column("a")) - - assert df_c.collect() == df_a_i_b.collect() - - -def test_except_all(): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - df_a = ctx.create_dataframe([[batch]]) - - batch = pa.RecordBatch.from_arrays( - [pa.array([3, 4, 5]), pa.array([6, 7, 8])], - names=["a", "b"], - ) - df_b = ctx.create_dataframe([[batch]]) - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2]), pa.array([4, 5])], - names=["a", "b"], - ) - df_c = ctx.create_dataframe([[batch]]).sort(column("a")) - - df_a_e_b = df_a.except_all(df_b).sort(column("a")) - - assert df_c.collect() == df_a_e_b.collect() - - -def test_collect_partitioned(): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - - assert [[batch]] == ctx.create_dataframe([[batch]]).collect_partitioned() - - -def test_collect_column(ctx: SessionContext): - batch_1 = pa.RecordBatch.from_pydict({"a": [1, 2, 3]}) - batch_2 = pa.RecordBatch.from_pydict({"a": [4, 5, 6]}) - batch_3 = pa.RecordBatch.from_pydict({"a": [7, 8, 9]}) - - ctx.register_record_batches("t", [[batch_1, batch_2], [batch_3]]) - - result = ctx.table("t").sort(column("a")).collect_column("a") - expected = pa.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) - assert result == expected - - -def test_union(ctx): - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - df_a = ctx.create_dataframe([[batch]]) - - batch = pa.RecordBatch.from_arrays( - [pa.array([3, 4, 5]), pa.array([6, 7, 8])], - names=["a", "b"], - ) - df_b = ctx.create_dataframe([[batch]]) - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3, 3, 4, 5]), pa.array([4, 5, 6, 6, 7, 8])], - names=["a", "b"], - ) - df_c = ctx.create_dataframe([[batch]]).sort(column("a")) - - df_a_u_b = df_a.union(df_b).sort(column("a")) - - assert df_c.collect() == df_a_u_b.collect() - - -def test_union_distinct(ctx): - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - df_a = ctx.create_dataframe([[batch]]) - - batch = pa.RecordBatch.from_arrays( - [pa.array([3, 4, 5]), pa.array([6, 7, 8])], - names=["a", "b"], - ) - df_b = ctx.create_dataframe([[batch]]) - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3, 4, 5]), pa.array([4, 5, 6, 7, 8])], - names=["a", "b"], - ) - df_c = ctx.create_dataframe([[batch]]).sort(column("a")) - - df_a_u_b = df_a.union(df_b, distinct=True).sort(column("a")) - - assert df_c.collect() == df_a_u_b.collect() - assert df_c.collect() == df_a_u_b.collect() - - -def test_cache(df): - assert df.cache().collect() == df.collect() - - -def test_count(df): - # Get number of rows - assert df.count() == 3 - - -def test_to_pandas(df): - # Skip test if pandas is not installed - pd = pytest.importorskip("pandas") - - # Convert datafusion dataframe to pandas dataframe - pandas_df = df.to_pandas() - assert isinstance(pandas_df, pd.DataFrame) - assert pandas_df.shape == (3, 3) - assert set(pandas_df.columns) == {"a", "b", "c"} - - -def test_empty_to_pandas(df): - # Skip test if pandas is not installed - pd = pytest.importorskip("pandas") - - # Convert empty datafusion dataframe to pandas dataframe - pandas_df = df.limit(0).to_pandas() - assert isinstance(pandas_df, pd.DataFrame) - assert pandas_df.shape == (0, 3) - assert set(pandas_df.columns) == {"a", "b", "c"} - - -def test_to_polars(df): - # Skip test if polars is not installed - pl = pytest.importorskip("polars") - - # Convert datafusion dataframe to polars dataframe - polars_df = df.to_polars() - assert isinstance(polars_df, pl.DataFrame) - assert polars_df.shape == (3, 3) - assert set(polars_df.columns) == {"a", "b", "c"} - - -def test_empty_to_polars(df): - # Skip test if polars is not installed - pl = pytest.importorskip("polars") - - # Convert empty datafusion dataframe to polars dataframe - polars_df = df.limit(0).to_polars() - assert isinstance(polars_df, pl.DataFrame) - assert polars_df.shape == (0, 3) - assert set(polars_df.columns) == {"a", "b", "c"} - - -def test_to_arrow_table(df): - # Convert datafusion dataframe to pyarrow Table - pyarrow_table = df.to_arrow_table() - assert isinstance(pyarrow_table, pa.Table) - assert pyarrow_table.shape == (3, 3) - assert set(pyarrow_table.column_names) == {"a", "b", "c"} - - -def test_parquet_non_null_column_to_pyarrow(ctx, tmp_path): - path = tmp_path.joinpath("t.parquet") - - ctx.sql("create table t_(a int not null)").collect() - ctx.sql("insert into t_ values (1), (2), (3)").collect() - ctx.sql(f"copy (select * from t_) to '{path}'").collect() - - ctx.register_parquet("t", path) - pyarrow_table = ctx.sql("select max(a) as m from t").to_arrow_table() - assert pyarrow_table.to_pydict() == {"m": [3]} - - -def test_parquet_empty_batch_to_pyarrow(ctx, tmp_path): - path = tmp_path.joinpath("t.parquet") - - ctx.sql("create table t_(a int not null)").collect() - ctx.sql("insert into t_ values (1), (2), (3)").collect() - ctx.sql(f"copy (select * from t_) to '{path}'").collect() - - ctx.register_parquet("t", path) - pyarrow_table = ctx.sql("select * from t limit 0").to_arrow_table() - assert pyarrow_table.schema == pa.schema( - [ - pa.field("a", pa.int32(), nullable=False), - ] - ) - - -def test_parquet_null_aggregation_to_pyarrow(ctx, tmp_path): - path = tmp_path.joinpath("t.parquet") - - ctx.sql("create table t_(a int not null)").collect() - ctx.sql("insert into t_ values (1), (2), (3)").collect() - ctx.sql(f"copy (select * from t_) to '{path}'").collect() - - ctx.register_parquet("t", path) - pyarrow_table = ctx.sql( - "select max(a) as m from (select * from t where a < 0)" - ).to_arrow_table() - assert pyarrow_table.to_pydict() == {"m": [None]} - assert pyarrow_table.schema == pa.schema( - [ - pa.field("m", pa.int32(), nullable=True), - ] - ) - - -def test_execute_stream(df): - stream = df.execute_stream() - assert all(batch is not None for batch in stream) - assert not list(stream) # after one iteration the generator must be exhausted - - -@pytest.mark.asyncio -async def test_execute_stream_async(df): - stream = df.execute_stream() - batches = [batch async for batch in stream] - - assert all(batch is not None for batch in batches) - - # After consuming all batches, the stream should be exhausted - remaining_batches = [batch async for batch in stream] - assert not remaining_batches - - -@pytest.mark.parametrize("schema", [True, False]) -def test_execute_stream_to_arrow_table(df, schema): - stream = df.execute_stream() - - if schema: - pyarrow_table = pa.Table.from_batches( - (batch.to_pyarrow() for batch in stream), schema=df.schema() - ) - else: - pyarrow_table = pa.Table.from_batches(batch.to_pyarrow() for batch in stream) - - assert isinstance(pyarrow_table, pa.Table) - assert pyarrow_table.shape == (3, 3) - assert set(pyarrow_table.column_names) == {"a", "b", "c"} - - -@pytest.mark.asyncio -@pytest.mark.parametrize("schema", [True, False]) -async def test_execute_stream_to_arrow_table_async(df, schema): - stream = df.execute_stream() - - if schema: - pyarrow_table = pa.Table.from_batches( - [batch.to_pyarrow() async for batch in stream], schema=df.schema() - ) - else: - pyarrow_table = pa.Table.from_batches( - [batch.to_pyarrow() async for batch in stream] - ) - - assert isinstance(pyarrow_table, pa.Table) - assert pyarrow_table.shape == (3, 3) - assert set(pyarrow_table.column_names) == {"a", "b", "c"} - - -def test_execute_stream_partitioned(df): - streams = df.execute_stream_partitioned() - assert all(batch is not None for stream in streams for batch in stream) - assert all( - not list(stream) for stream in streams - ) # after one iteration all generators must be exhausted - - -@pytest.mark.asyncio -async def test_execute_stream_partitioned_async(df): - streams = df.execute_stream_partitioned() - - for stream in streams: - batches = [batch async for batch in stream] - assert all(batch is not None for batch in batches) - - # Ensure the stream is exhausted after iteration - remaining_batches = [batch async for batch in stream] - assert not remaining_batches - - -def test_empty_to_arrow_table(df): - # Convert empty datafusion dataframe to pyarrow Table - pyarrow_table = df.limit(0).to_arrow_table() - assert isinstance(pyarrow_table, pa.Table) - assert pyarrow_table.shape == (0, 3) - assert set(pyarrow_table.column_names) == {"a", "b", "c"} - - -def test_iter_batches_dataframe(fail_collect): - ctx = SessionContext() - - batch1 = pa.record_batch([pa.array([1])], names=["a"]) - batch2 = pa.record_batch([pa.array([2])], names=["a"]) - df = ctx.create_dataframe([[batch1], [batch2]]) - - expected = [batch1, batch2] - results = [b.to_pyarrow() for b in df] - - assert len(results) == len(expected) - for exp in expected: - assert any(got.equals(exp) for got in results) - - -def test_arrow_c_stream_to_table_and_reader(fail_collect): - ctx = SessionContext() - - # Create a DataFrame with two separate record batches - batch1 = pa.record_batch([pa.array([1])], names=["a"]) - batch2 = pa.record_batch([pa.array([2])], names=["a"]) - df = ctx.create_dataframe([[batch1], [batch2]]) - - table = pa.Table.from_batches(batch.to_pyarrow() for batch in df) - batches = table.to_batches() - - assert len(batches) == 2 - expected = [batch1, batch2] - for exp in expected: - assert any(got.equals(exp) for got in batches) - assert table.schema == df.schema() - assert table.column("a").num_chunks == 2 - - reader = pa.RecordBatchReader.from_stream(df) - assert isinstance(reader, pa.RecordBatchReader) - reader_table = pa.Table.from_batches(reader) - expected = pa.Table.from_batches([batch1, batch2]) - assert reader_table.equals(expected) - - -def test_arrow_c_stream_order(): - ctx = SessionContext() - - batch1 = pa.record_batch([pa.array([1])], names=["a"]) - batch2 = pa.record_batch([pa.array([2])], names=["a"]) - - df = ctx.create_dataframe([[batch1, batch2]]) - - table = pa.Table.from_batches(batch.to_pyarrow() for batch in df) - expected = pa.Table.from_batches([batch1, batch2]) - - assert table.equals(expected) - col = table.column("a") - assert col.chunk(0)[0].as_py() == 1 - assert col.chunk(1)[0].as_py() == 2 - - -def test_arrow_c_stream_schema_selection(fail_collect): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [ - pa.array([1, 2]), - pa.array([3, 4]), - pa.array([5, 6]), - ], - names=["a", "b", "c"], - ) - df = ctx.create_dataframe([[batch]]) - - requested_schema = pa.schema([("c", pa.int64()), ("a", pa.int64())]) - - c_schema = pa_cffi.ffi.new("struct ArrowSchema*") - address = int(pa_cffi.ffi.cast("uintptr_t", c_schema)) - requested_schema._export_to_c(address) - capsule_new = ctypes.pythonapi.PyCapsule_New - capsule_new.restype = ctypes.py_object - capsule_new.argtypes = [ctypes.c_void_p, ctypes.c_char_p, ctypes.c_void_p] - - reader = pa.RecordBatchReader.from_stream(df, schema=requested_schema) - - assert reader.schema == requested_schema - - batches = list(reader) - - assert len(batches) == 1 - expected_batch = pa.record_batch( - [pa.array([5, 6]), pa.array([1, 2])], names=["c", "a"] - ) - assert batches[0].equals(expected_batch) - - -def test_arrow_c_stream_schema_mismatch(fail_collect): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2]), pa.array([3, 4])], names=["a", "b"] - ) - df = ctx.create_dataframe([[batch]]) - - bad_schema = pa.schema([("a", pa.string())]) - - c_schema = pa_cffi.ffi.new("struct ArrowSchema*") - address = int(pa_cffi.ffi.cast("uintptr_t", c_schema)) - bad_schema._export_to_c(address) - - capsule_new = ctypes.pythonapi.PyCapsule_New - capsule_new.restype = ctypes.py_object - capsule_new.argtypes = [ctypes.c_void_p, ctypes.c_char_p, ctypes.c_void_p] - bad_capsule = capsule_new(ctypes.c_void_p(address), b"arrow_schema", None) - - with pytest.raises(Exception, match="Fail to merge schema"): - df.__arrow_c_stream__(bad_capsule) - - -def test_to_pylist(df): - # Convert datafusion dataframe to Python list - pylist = df.to_pylist() - assert isinstance(pylist, list) - assert pylist == [ - {"a": 1, "b": 4, "c": 8}, - {"a": 2, "b": 5, "c": 5}, - {"a": 3, "b": 6, "c": 8}, - ] - - -def test_to_pydict(df): - # Convert datafusion dataframe to Python dictionary - pydict = df.to_pydict() - assert isinstance(pydict, dict) - assert pydict == {"a": [1, 2, 3], "b": [4, 5, 6], "c": [8, 5, 8]} - - -def test_describe(df): - # Calculate statistics - df = df.describe() - - # Collect the result - result = df.to_pydict() - - assert result == { - "describe": [ - "count", - "null_count", - "mean", - "std", - "min", - "max", - "median", - ], - "a": [3.0, 0.0, 2.0, 1.0, 1.0, 3.0, 2.0], - "b": [3.0, 0.0, 5.0, 1.0, 4.0, 6.0, 5.0], - "c": [3.0, 0.0, 7.0, 1.7320508075688772, 5.0, 8.0, 8.0], - } - - -@pytest.mark.parametrize("path_to_str", [True, False]) -def test_write_csv(ctx, df, tmp_path, path_to_str): - path = str(tmp_path) if path_to_str else tmp_path - - df.write_csv(path, with_header=True) - - ctx.register_csv("csv", path) - result = ctx.table("csv").to_pydict() - expected = df.to_pydict() - - assert result == expected - - -def generate_test_write_params() -> list[tuple]: - # Overwrite and Replace are not implemented for many table writers - insert_ops = [InsertOp.APPEND, None] - sort_by_cases = [ - (None, [1, 2, 3], "unsorted"), - (column("c"), [2, 1, 3], "single_column_expr"), - (column("a").sort(ascending=False), [3, 2, 1], "single_sort_expr"), - ([column("c"), column("b")], [2, 1, 3], "list_col_expr"), - ( - [column("c").sort(ascending=False), column("b").sort(ascending=False)], - [3, 1, 2], - "list_sort_expr", - ), - ] - - formats = ["csv", "json", "parquet", "table"] - - return [ - pytest.param( - output_format, - insert_op, - sort_by, - expected_a, - id=f"{output_format}_{test_id}", - ) - for output_format, insert_op, ( - sort_by, - expected_a, - test_id, - ) in itertools.product(formats, insert_ops, sort_by_cases) - ] - - -@pytest.mark.parametrize( - ("output_format", "insert_op", "sort_by", "expected_a"), - generate_test_write_params(), -) -def test_write_files_with_options( - ctx, df, tmp_path, output_format, insert_op, sort_by, expected_a -) -> None: - write_options = DataFrameWriteOptions(insert_operation=insert_op, sort_by=sort_by) - - if output_format == "csv": - df.write_csv(tmp_path, with_header=True, write_options=write_options) - ctx.register_csv("test_table", tmp_path) - elif output_format == "json": - df.write_json(tmp_path, write_options=write_options) - ctx.register_json("test_table", tmp_path) - elif output_format == "parquet": - df.write_parquet(tmp_path, write_options=write_options) - ctx.register_parquet("test_table", tmp_path) - elif output_format == "table": - batch = pa.RecordBatch.from_arrays([[], [], []], schema=df.schema()) - ctx.register_record_batches("test_table", [[batch]]) - ctx.table("test_table").show() - df.write_table("test_table", write_options=write_options) - - result = ctx.table("test_table").to_pydict()["a"] - ctx.table("test_table").show() - - assert result == expected_a - - -@pytest.mark.parametrize("path_to_str", [True, False]) -def test_write_json(ctx, df, tmp_path, path_to_str): - path = str(tmp_path) if path_to_str else tmp_path - - df.write_json(path) - - ctx.register_json("json", path) - result = ctx.table("json").to_pydict() - expected = df.to_pydict() - - assert result == expected - - -@pytest.mark.parametrize("path_to_str", [True, False]) -def test_write_parquet(df, tmp_path, path_to_str): - path = str(tmp_path) if path_to_str else tmp_path - - df.write_parquet(str(path)) - result = pq.read_table(str(path)).to_pydict() - expected = df.to_pydict() - - assert result == expected - - -@pytest.mark.parametrize( - ("compression", "compression_level"), - [("gzip", 6), ("brotli", 7), ("zstd", 15)], -) -def test_write_compressed_parquet(df, tmp_path, compression, compression_level): - path = tmp_path - - df.write_parquet( - str(path), compression=compression, compression_level=compression_level - ) - - # test that the actual compression scheme is the one written - for _root, _dirs, files in os.walk(path): - for file in files: - if file.endswith(".parquet"): - metadata = pq.ParquetFile(tmp_path / file).metadata.to_dict() - for row_group in metadata["row_groups"]: - for columns in row_group["columns"]: - assert columns["compression"].lower() == compression - - result = pq.read_table(str(path)).to_pydict() - expected = df.to_pydict() - - assert result == expected - - -@pytest.mark.parametrize( - ("compression", "compression_level"), - [("gzip", 12), ("brotli", 15), ("zstd", 23), ("wrong", 12)], -) -def test_write_compressed_parquet_wrong_compression_level( - df, tmp_path, compression, compression_level -): - path = tmp_path - - with pytest.raises(ValueError): - df.write_parquet( - str(path), - compression=compression, - compression_level=compression_level, - ) - - -@pytest.mark.parametrize("compression", ["wrong"]) -def test_write_compressed_parquet_invalid_compression(df, tmp_path, compression): - path = tmp_path - - with pytest.raises(ValueError): - df.write_parquet(str(path), compression=compression) - - -# not testing lzo because it it not implemented yet -# https://github.com/apache/arrow-rs/issues/6970 -@pytest.mark.parametrize("compression", ["zstd", "brotli", "gzip"]) -def test_write_compressed_parquet_default_compression_level(df, tmp_path, compression): - # Test write_parquet with zstd, brotli, gzip default compression level, - # ie don't specify compression level - # should complete without error - path = tmp_path - - df.write_parquet(str(path), compression=compression) - - -def test_write_parquet_with_options_default_compression(df, tmp_path): - """Test that the default compression is ZSTD.""" - df.write_parquet(tmp_path) - - for file in tmp_path.rglob("*.parquet"): - metadata = pq.ParquetFile(file).metadata.to_dict() - for row_group in metadata["row_groups"]: - for col in row_group["columns"]: - assert col["compression"].lower() == "zstd" - - -@pytest.mark.parametrize( - "compression", - ["gzip(6)", "brotli(7)", "zstd(15)", "snappy", "uncompressed"], -) -def test_write_parquet_with_options_compression(df, tmp_path, compression): - import re - - path = tmp_path - df.write_parquet_with_options( - str(path), ParquetWriterOptions(compression=compression) - ) - - # test that the actual compression scheme is the one written - for _root, _dirs, files in os.walk(path): - for file in files: - if file.endswith(".parquet"): - metadata = pq.ParquetFile(tmp_path / file).metadata.to_dict() - for row_group in metadata["row_groups"]: - for col in row_group["columns"]: - assert col["compression"].lower() == re.sub( - r"\(\d+\)", "", compression - ) - - result = pq.read_table(str(path)).to_pydict() - expected = df.to_pydict() - - assert result == expected - - -@pytest.mark.parametrize( - "compression", - ["gzip(12)", "brotli(15)", "zstd(23)"], -) -def test_write_parquet_with_options_wrong_compression_level(df, tmp_path, compression): - path = tmp_path - - with pytest.raises(Exception, match=r"valid compression range .*? exceeded."): - df.write_parquet_with_options( - str(path), ParquetWriterOptions(compression=compression) - ) - - -@pytest.mark.parametrize("compression", ["wrong", "wrong(12)"]) -def test_write_parquet_with_options_invalid_compression(df, tmp_path, compression): - path = tmp_path - - with pytest.raises(Exception, match="Unknown or unsupported parquet compression"): - df.write_parquet_with_options( - str(path), ParquetWriterOptions(compression=compression) - ) - - -@pytest.mark.parametrize( - ("writer_version", "format_version"), - [("1.0", "1.0"), ("2.0", "2.6"), (None, "1.0")], -) -def test_write_parquet_with_options_writer_version( - df, tmp_path, writer_version, format_version -): - """Test the Parquet writer version. Note that writer_version=2.0 results in - format_version=2.6""" - if writer_version is None: - df.write_parquet_with_options(tmp_path, ParquetWriterOptions()) - else: - df.write_parquet_with_options( - tmp_path, ParquetWriterOptions(writer_version=writer_version) - ) - - for file in tmp_path.rglob("*.parquet"): - parquet = pq.ParquetFile(file) - metadata = parquet.metadata.to_dict() - assert metadata["format_version"] == format_version - - -@pytest.mark.parametrize("writer_version", ["1.2.3", "custom-version", "0"]) -def test_write_parquet_with_options_wrong_writer_version(df, tmp_path, writer_version): - """Test that invalid writer versions in Parquet throw an exception.""" - with pytest.raises(Exception, match="Invalid parquet writer version"): - df.write_parquet_with_options( - tmp_path, ParquetWriterOptions(writer_version=writer_version) - ) - - -@pytest.mark.parametrize("dictionary_enabled", [True, False, None]) -def test_write_parquet_with_options_dictionary_enabled( - df, tmp_path, dictionary_enabled -): - """Test enabling/disabling the dictionaries in Parquet.""" - df.write_parquet_with_options( - tmp_path, ParquetWriterOptions(dictionary_enabled=dictionary_enabled) - ) - # by default, the dictionary is enabled, so None results in True - result = dictionary_enabled if dictionary_enabled is not None else True - - for file in tmp_path.rglob("*.parquet"): - parquet = pq.ParquetFile(file) - metadata = parquet.metadata.to_dict() - - for row_group in metadata["row_groups"]: - for col in row_group["columns"]: - assert col["has_dictionary_page"] == result - - -@pytest.mark.parametrize( - ("statistics_enabled", "has_statistics"), - [("page", True), ("chunk", True), ("none", False), (None, True)], -) -def test_write_parquet_with_options_statistics_enabled( - df, tmp_path, statistics_enabled, has_statistics -): - """Test configuring the statistics in Parquet. In pyarrow we can only check for - column-level statistics, so "page" and "chunk" are tested in the same way.""" - df.write_parquet_with_options( - tmp_path, ParquetWriterOptions(statistics_enabled=statistics_enabled) - ) - - for file in tmp_path.rglob("*.parquet"): - parquet = pq.ParquetFile(file) - metadata = parquet.metadata.to_dict() - - for row_group in metadata["row_groups"]: - for col in row_group["columns"]: - if has_statistics: - assert col["statistics"] is not None - else: - assert col["statistics"] is None - - -@pytest.mark.parametrize("max_row_group_size", [1000, 5000, 10000, 100000]) -def test_write_parquet_with_options_max_row_group_size( - large_df, tmp_path, max_row_group_size -): - """Test configuring the max number of rows per group in Parquet. These test cases - guarantee that the number of rows for each row group is max_row_group_size, given - the total number of rows is a multiple of max_row_group_size.""" - path = f"{tmp_path}/t.parquet" - large_df.write_parquet_with_options( - path, ParquetWriterOptions(max_row_group_size=max_row_group_size) - ) - - parquet = pq.ParquetFile(path) - metadata = parquet.metadata.to_dict() - for row_group in metadata["row_groups"]: - assert row_group["num_rows"] == max_row_group_size - - -@pytest.mark.parametrize("created_by", ["datafusion", "datafusion-python", "custom"]) -def test_write_parquet_with_options_created_by(df, tmp_path, created_by): - """Test configuring the created by metadata in Parquet.""" - df.write_parquet_with_options(tmp_path, ParquetWriterOptions(created_by=created_by)) - - for file in tmp_path.rglob("*.parquet"): - parquet = pq.ParquetFile(file) - metadata = parquet.metadata.to_dict() - assert metadata["created_by"] == created_by - - -@pytest.mark.parametrize("statistics_truncate_length", [5, 25, 50]) -def test_write_parquet_with_options_statistics_truncate_length( - df, tmp_path, statistics_truncate_length -): - """Test configuring the truncate limit in Parquet's row-group-level statistics.""" - ctx = SessionContext() - data = { - "a": [ - "a_the_quick_brown_fox_jumps_over_the_lazy_dog", - "m_the_quick_brown_fox_jumps_over_the_lazy_dog", - "z_the_quick_brown_fox_jumps_over_the_lazy_dog", - ], - "b": ["a_smaller", "m_smaller", "z_smaller"], - } - df = ctx.from_arrow(pa.record_batch(data)) - df.write_parquet_with_options( - tmp_path, - ParquetWriterOptions(statistics_truncate_length=statistics_truncate_length), - ) - - for file in tmp_path.rglob("*.parquet"): - parquet = pq.ParquetFile(file) - metadata = parquet.metadata.to_dict() - - for row_group in metadata["row_groups"]: - for col in row_group["columns"]: - statistics = col["statistics"] - assert len(statistics["min"]) <= statistics_truncate_length - assert len(statistics["max"]) <= statistics_truncate_length - - -def test_write_parquet_with_options_default_encoding(tmp_path): - """Test that, by default, Parquet files are written with dictionary encoding. - Note that dictionary encoding is not used for boolean values, so it is not tested - here.""" - ctx = SessionContext() - data = { - "a": [1, 2, 3], - "b": ["1", "2", "3"], - "c": [1.01, 2.02, 3.03], - } - df = ctx.from_arrow(pa.record_batch(data)) - df.write_parquet_with_options(tmp_path, ParquetWriterOptions()) - - for file in tmp_path.rglob("*.parquet"): - parquet = pq.ParquetFile(file) - metadata = parquet.metadata.to_dict() - - for row_group in metadata["row_groups"]: - for col in row_group["columns"]: - assert col["encodings"] == ("PLAIN", "RLE", "RLE_DICTIONARY") - - -@pytest.mark.parametrize( - ("encoding", "data_types", "result"), - [ - ("plain", ["int", "float", "str", "bool"], ("PLAIN", "RLE")), - ("rle", ["bool"], ("RLE",)), - ("delta_binary_packed", ["int"], ("RLE", "DELTA_BINARY_PACKED")), - ("delta_length_byte_array", ["str"], ("RLE", "DELTA_LENGTH_BYTE_ARRAY")), - ("delta_byte_array", ["str"], ("RLE", "DELTA_BYTE_ARRAY")), - ("byte_stream_split", ["int", "float"], ("RLE", "BYTE_STREAM_SPLIT")), - ], -) -def test_write_parquet_with_options_encoding(tmp_path, encoding, data_types, result): - """Test different encodings in Parquet in their respective support column types.""" - ctx = SessionContext() - - data = {} - for data_type in data_types: - if data_type == "int": - data["int"] = [1, 2, 3] - elif data_type == "float": - data["float"] = [1.01, 2.02, 3.03] - elif data_type == "str": - data["str"] = ["a", "b", "c"] - elif data_type == "bool": - data["bool"] = [True, False, True] - - df = ctx.from_arrow(pa.record_batch(data)) - df.write_parquet_with_options( - tmp_path, ParquetWriterOptions(encoding=encoding, dictionary_enabled=False) - ) - - for file in tmp_path.rglob("*.parquet"): - parquet = pq.ParquetFile(file) - metadata = parquet.metadata.to_dict() - - for row_group in metadata["row_groups"]: - for col in row_group["columns"]: - assert col["encodings"] == result - - -@pytest.mark.parametrize("encoding", ["bit_packed"]) -def test_write_parquet_with_options_unsupported_encoding(df, tmp_path, encoding): - """Test that unsupported Parquet encodings do not work.""" - # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519 - with pytest.raises(BaseException, match=r"Encoding .*? is not supported"): - df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding)) - - -@pytest.mark.parametrize("encoding", ["non_existent", "unknown", "plain123"]) -def test_write_parquet_with_options_invalid_encoding(df, tmp_path, encoding): - """Test that invalid Parquet encodings do not work.""" - with pytest.raises(Exception, match="Unknown or unsupported parquet encoding"): - df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding)) - - -@pytest.mark.parametrize("encoding", ["plain_dictionary", "rle_dictionary"]) -def test_write_parquet_with_options_dictionary_encoding_fallback( - df, tmp_path, encoding -): - """Test that the dictionary encoding cannot be used as fallback in Parquet.""" - # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519 - with pytest.raises( - BaseException, match="Dictionary encoding can not be used as fallback encoding" - ): - df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding)) - - -def test_write_parquet_with_options_bloom_filter(df, tmp_path): - """Test Parquet files with and without (default) bloom filters. Since pyarrow does - not expose any information about bloom filters, the easiest way to confirm that they - are actually written is to compare the file size.""" - path_no_bloom_filter = tmp_path / "1" - path_bloom_filter = tmp_path / "2" - - df.write_parquet_with_options(path_no_bloom_filter, ParquetWriterOptions()) - df.write_parquet_with_options( - path_bloom_filter, ParquetWriterOptions(bloom_filter_on_write=True) - ) - - size_no_bloom_filter = 0 - for file in path_no_bloom_filter.rglob("*.parquet"): - size_no_bloom_filter += Path(file).stat().st_size - - size_bloom_filter = 0 - for file in path_bloom_filter.rglob("*.parquet"): - size_bloom_filter += Path(file).stat().st_size - - assert size_no_bloom_filter < size_bloom_filter - - -def test_write_parquet_with_options_column_options(df, tmp_path): - """Test writing Parquet files with different options for each column, which replace - the global configs (when provided).""" - data = { - "a": [1, 2, 3], - "b": ["a", "b", "c"], - "c": [False, True, False], - "d": [1.01, 2.02, 3.03], - "e": [4, 5, 6], - } - - column_specific_options = { - "a": ParquetColumnOptions(statistics_enabled="none"), - "b": ParquetColumnOptions(encoding="plain", dictionary_enabled=False), - "c": ParquetColumnOptions( - compression="snappy", encoding="rle", dictionary_enabled=False - ), - "d": ParquetColumnOptions( - compression="zstd(6)", - encoding="byte_stream_split", - dictionary_enabled=False, - statistics_enabled="none", - ), - # column "e" will use the global configs - } - - results = { - "a": { - "statistics": False, - "compression": "brotli", - "encodings": ("PLAIN", "RLE", "RLE_DICTIONARY"), - }, - "b": { - "statistics": True, - "compression": "brotli", - "encodings": ("PLAIN", "RLE"), - }, - "c": { - "statistics": True, - "compression": "snappy", - "encodings": ("RLE",), - }, - "d": { - "statistics": False, - "compression": "zstd", - "encodings": ("RLE", "BYTE_STREAM_SPLIT"), - }, - "e": { - "statistics": True, - "compression": "brotli", - "encodings": ("PLAIN", "RLE", "RLE_DICTIONARY"), - }, - } - - ctx = SessionContext() - df = ctx.from_arrow(pa.record_batch(data)) - df.write_parquet_with_options( - tmp_path, - ParquetWriterOptions( - compression="brotli(8)", column_specific_options=column_specific_options - ), - ) - - for file in tmp_path.rglob("*.parquet"): - parquet = pq.ParquetFile(file) - metadata = parquet.metadata.to_dict() - - for row_group in metadata["row_groups"]: - for col in row_group["columns"]: - column_name = col["path_in_schema"] - result = results[column_name] - assert (col["statistics"] is not None) == result["statistics"] - assert col["compression"].lower() == result["compression"].lower() - assert col["encodings"] == result["encodings"] - - -def test_write_parquet_options(df, tmp_path): - options = ParquetWriterOptions(compression="gzip", compression_level=6) - df.write_parquet(str(tmp_path), options) - - result = pq.read_table(str(tmp_path)).to_pydict() - expected = df.to_pydict() - - assert result == expected - - -def test_write_parquet_options_error(df, tmp_path): - options = ParquetWriterOptions(compression="gzip", compression_level=6) - with pytest.raises(ValueError): - df.write_parquet(str(tmp_path), options, compression_level=1) - - -def test_write_table(ctx, df): - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3])], - names=["a"], - ) - - ctx.register_record_batches("t", [[batch]]) - - df = ctx.table("t").with_column("a", column("a") * literal(-1)) - - ctx.table("t").show() - - df.write_table("t") - result = ctx.table("t").sort(column("a")).collect()[0][0].to_pylist() - expected = [-3, -2, -1, 1, 2, 3] - - assert result == expected - - -def test_dataframe_export(df) -> None: - # Guarantees that we have the canonical implementation - # reading our dataframe export - table = pa.table(df) - assert table.num_columns == 3 - assert table.num_rows == 3 - - desired_schema = pa.schema([("a", pa.int64())]) - - # Verify we can request a schema - table = pa.table(df, schema=desired_schema) - assert table.num_columns == 1 - assert table.num_rows == 3 - - # Expect a table of nulls if the schema don't overlap - desired_schema = pa.schema([("g", pa.string())]) - table = pa.table(df, schema=desired_schema) - assert table.num_columns == 1 - assert table.num_rows == 3 - for i in range(3): - assert table[0][i].as_py() is None - - # Expect an error when we cannot convert schema - desired_schema = pa.schema([("a", pa.float32())]) - failed_convert = False - try: - table = pa.table(df, schema=desired_schema) - except Exception: - failed_convert = True - assert failed_convert - - # Expect an error when we have a not set non-nullable - desired_schema = pa.schema([("g", pa.string(), False)]) - failed_convert = False - try: - table = pa.table(df, schema=desired_schema) - except Exception: - failed_convert = True - assert failed_convert - - -def test_dataframe_transform(df): - def add_string_col(df_internal) -> DataFrame: - return df_internal.with_column("string_col", literal("string data")) - - def add_with_parameter(df_internal, value: Any) -> DataFrame: - return df_internal.with_column("new_col", literal(value)) - - df = df.transform(add_string_col).transform(add_with_parameter, 3) - - result = df.to_pydict() - - assert result["a"] == [1, 2, 3] - assert result["string_col"] == ["string data" for _i in range(3)] - assert result["new_col"] == [3 for _i in range(3)] - - -def test_dataframe_repr_html_structure(df, clean_formatter_state) -> None: - """Test that DataFrame._repr_html_ produces expected HTML output structure.""" - - output = df._repr_html_() - - # Since we've added a fair bit of processing to the html output, lets just verify - # the values we are expecting in the table exist. Use regex and ignore everything - # between the and . We also don't want the closing > on the - # td and th segments because that is where the formatting data is written. - - headers = ["a", "b", "c"] - headers = [f"{v}" for v in headers] - header_pattern = "(.*?)".join(headers) - header_matches = re.findall(header_pattern, output, re.DOTALL) - assert len(header_matches) == 1 - - # Update the pattern to handle values that may be wrapped in spans - body_data = [[1, 4, 8], [2, 5, 5], [3, 6, 8]] - - body_lines = [ - f"(?:]*?>)?{v}(?:)?" - for inner in body_data - for v in inner - ] - body_pattern = "(.*?)".join(body_lines) - - body_matches = re.findall(body_pattern, output, re.DOTALL) - - assert len(body_matches) == 1, "Expected pattern of values not found in HTML output" - - -def test_dataframe_repr_html_values(df, clean_formatter_state): - """Test that DataFrame._repr_html_ contains the expected data values.""" - html = df._repr_html_() - assert html is not None - - # Create a more flexible pattern that handles values being wrapped in spans - # This pattern will match the sequence of values 1,4,8,2,5,5,3,6,8 regardless - # of formatting - pattern = re.compile( - r"]*?>(?:]*?>)?1(?:)?.*?" - r"]*?>(?:]*?>)?4(?:)?.*?" - r"]*?>(?:]*?>)?8(?:)?.*?" - r"]*?>(?:]*?>)?2(?:)?.*?" - r"]*?>(?:]*?>)?5(?:)?.*?" - r"]*?>(?:]*?>)?5(?:)?.*?" - r"]*?>(?:]*?>)?3(?:)?.*?" - r"]*?>(?:]*?>)?6(?:)?.*?" - r"]*?>(?:]*?>)?8(?:)?", - re.DOTALL, - ) - - # Print debug info if the test fails - matches = re.findall(pattern, html) - if not matches: - print(f"HTML output snippet: {html[:500]}...") # noqa: T201 - - assert len(matches) > 0, "Expected pattern of values not found in HTML output" - - -def test_html_formatter_shared_styles(df, clean_formatter_state): - """Test that shared styles work correctly across multiple tables.""" - - # First, ensure we're using shared styles - configure_formatter(use_shared_styles=True) - - html_first = df._repr_html_() - html_second = df._repr_html_() - - assert " + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ +
+ + + + + + +
+ +
+ +

Search

+ + + + +

+ Searching for multiple words only shows matches that contain + all words. +

+ + +
+ + + + + + +
+ + +
+ + + +
+
+ +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/searchindex.js b/searchindex.js new file mode 100644 index 000000000..235db5bf1 --- /dev/null +++ b/searchindex.js @@ -0,0 +1 @@ +Search.setIndex({"alltitles": {"API Reference": [[19, null]], "Additional Resources": [[37, "additional-resources"]], "Aggregate Functions": [[24, "aggregate-functions"], [31, "aggregate-functions"], [33, "aggregate-functions"]], "Aggregation": [[24, null]], "Alternative Approach": [[20, "alternative-approach"]], "Apache Iceberg": [[35, "apache-iceberg"]], "Arrays": [[26, "arrays"]], "Arrow": [[39, null]], "Attributes": [[4, "attributes"], [7, "attributes"], [12, "attributes"], [18, "attributes"]], "Available Functions": [[33, "available-functions"]], "Avro": [[40, null]], "Basic HTML Rendering": [[37, "basic-html-rendering"]], "Basic Operations": [[25, null]], "Benchmark Example": [[34, "benchmark-example"]], "Best Practices": [[37, "best-practices"]], "Boolean": [[26, "boolean"]], "Built-in Functions": [[36, "built-in-functions"]], "CSV": [[41, null]], "Casting": [[27, "casting"]], "Catalog": [[35, "catalog"]], "Classes": [[0, "classes"], [1, "classes"], [2, "classes"], [3, "classes"], [4, "classes"], [7, "classes"], [8, "classes"], [9, "classes"], [10, "classes"], [13, "classes"], [14, "classes"], [15, "classes"], [16, "classes"], [17, "classes"], [18, "classes"]], "Column": [[26, "column"]], "Column Names as Function Arguments": [[36, "column-names-as-function-arguments"]], "Column Selections": [[30, null]], "Common DataFrame Operations": [[36, "common-dataframe-operations"]], "Common Operations": [[28, null]], "Concepts": [[23, null]], "Conditional": [[27, "conditional"]], "Configuration": [[34, null]], "Configuring the HTML Formatter": [[37, "configuring-the-html-formatter"]], "Contextual Formatting": [[37, "contextual-formatting"]], "Core Classes": [[36, "core-classes"]], "Create in-memory": [[35, "create-in-memory"]], "Creating DataFrames": [[36, "creating-dataframes"]], "Creating a Custom Formatter": [[37, "creating-a-custom-formatter"]], "Custom Style Providers": [[37, "custom-style-providers"]], "Custom Table Provider": [[35, "custom-table-provider"], [45, null]], "Customizing HTML Rendering": [[37, "customizing-html-rendering"]], "Data Sources": [[35, null]], "DataFrame": [[23, "dataframe"]], "DataFrames": [[36, null]], "DataFusion 52.0.0": [[47, "datafusion-52-0-0"]], "DataFusion in Python": [[22, null]], "Delta Lake": [[35, "delta-lake"]], "Distinct": [[24, "distinct"]], "Duplicate Keys": [[29, "duplicate-keys"]], "Example": [[22, "example"]], "Execute as Stream": [[36, "execute-as-stream"]], "Exporting from DataFusion": [[39, "exporting-from-datafusion"]], "Expression Classes": [[36, "expression-classes"]], "Expressions": [[23, "expressions"], [26, null]], "FAQ": [[31, "faq"]], "Filter": [[24, "filter"]], "Full Join": [[29, "full-join"]], "Functions": [[3, "functions"], [4, "functions"], [5, "functions"], [7, "functions"], [11, "functions"], [18, "functions"], [26, "functions"], [27, null]], "Guidelines for Separating Python and Rust Code": [[21, "guidelines-for-separating-python-and-rust-code"]], "HTML Rendering": [[36, "html-rendering"]], "HTML Rendering in Jupyter": [[37, null]], "Handling Missing Values": [[27, "handling-missing-values"]], "How to develop": [[21, "how-to-develop"]], "IO": [[42, null]], "Implementation Details": [[20, "implementation-details"]], "Important Considerations": [[34, "important-considerations"]], "Importing to DataFusion": [[39, "importing-to-datafusion"]], "Improving Build Speed": [[21, "improving-build-speed"]], "Inner Join": [[29, "inner-join"]], "Inspiration from Arrow": [[20, "inspiration-from-arrow"]], "Install": [[22, "install"]], "Installation": [[38, "installation"]], "Introduction": [[21, null], [38, null]], "JSON": [[43, null]], "Joins": [[29, null]], "Left Anti Join": [[29, "left-anti-join"]], "Left Join": [[29, "left-join"]], "Left Semi Join": [[29, "left-semi-join"]], "Literal": [[26, "literal"]], "Local file": [[35, "local-file"]], "Managing Formatters": [[37, "managing-formatters"]], "Mathematical": [[27, "mathematical"]], "Maximizing CPU Usage": [[34, "maximizing-cpu-usage"]], "Memory and Display Controls": [[37, "memory-and-display-controls"]], "Module Contents": [[0, "module-contents"], [1, "module-contents"], [2, "module-contents"], [3, "module-contents"], [4, "module-contents"], [5, "module-contents"], [8, "module-contents"], [10, "module-contents"], [11, "module-contents"], [12, "module-contents"], [13, "module-contents"], [14, "module-contents"], [15, "module-contents"], [16, "module-contents"], [17, "module-contents"], [18, "module-contents"]], "Null Treatment": [[24, "null-treatment"], [33, "null-treatment"]], "Object Store": [[35, "object-store"]], "Ordering": [[24, "ordering"], [33, "ordering"]], "Other": [[27, "other"]], "Other DataFrame Libraries": [[35, "other-dataframe-libraries"]], "Overview": [[36, "overview"]], "Package Contents": [[7, "package-contents"], [9, "package-contents"]], "Parameterized queries": [[46, "parameterized-queries"]], "Parquet": [[44, null]], "Partitions": [[33, "partitions"]], "Performance Optimization with Shared Styles": [[37, "performance-optimization-with-shared-styles"]], "PyArrow": [[36, "pyarrow"]], "PyO3 class mutability guidelines": [[20, "pyo3-class-mutability-guidelines"]], "Python Extensions": [[20, null]], "Registering Views": [[32, null]], "Returns:": [[4, "returns"], [4, "id1"], [5, "returns"], [5, "id1"], [7, "returns"], [7, "id1"]], "Running & Installing pre-commit hooks": [[21, "running-installing-pre-commit-hooks"]], "SQL": [[46, null]], "Scalar Functions": [[31, "scalar-functions"]], "Session Context": [[23, "session-context"]], "Setting Parameters": [[24, "setting-parameters"], [33, "setting-parameters"]], "Status of Work": [[20, "status-of-work"]], "String": [[27, "string"]], "Structs": [[26, "structs"]], "Submodules": [[7, "submodules"], [9, "submodules"]], "Table Functions": [[31, "table-functions"]], "Temporal": [[27, "temporal"]], "Terminal Operations": [[36, "terminal-operations"]], "The FFI Approach": [[20, "the-ffi-approach"]], "The Primary Issue": [[20, "the-primary-issue"]], "UDWF options": [[31, "udwf-options"]], "Update Dependencies": [[21, "update-dependencies"]], "Upgrade Guides": [[47, null]], "User Defined Catalog and Schema": [[35, "user-defined-catalog-and-schema"]], "User-Defined Functions": [[31, null]], "Window Frame": [[33, "window-frame"]], "Window Functions": [[31, "window-functions"], [33, null]], "Zero-copy streaming to Arrow-based Python libraries": [[36, "zero-copy-streaming-to-arrow-based-python-libraries"]], "datafusion": [[7, null]], "datafusion.catalog": [[0, null]], "datafusion.context": [[1, null]], "datafusion.dataframe": [[2, null]], "datafusion.dataframe_formatter": [[3, null]], "datafusion.expr": [[4, null]], "datafusion.functions": [[5, null]], "datafusion.html_formatter": [[6, null]], "datafusion.input": [[9, null]], "datafusion.input.base": [[8, null]], "datafusion.input.location": [[10, null]], "datafusion.io": [[11, null]], "datafusion.object_store": [[12, null]], "datafusion.options": [[13, null]], "datafusion.plan": [[14, null]], "datafusion.record_batch": [[15, null]], "datafusion.substrait": [[16, null]], "datafusion.unparser": [[17, null]], "datafusion.user_defined": [[18, null]], "fill_null": [[27, "fill-null"]]}, "docnames": ["autoapi/datafusion/catalog/index", "autoapi/datafusion/context/index", "autoapi/datafusion/dataframe/index", "autoapi/datafusion/dataframe_formatter/index", "autoapi/datafusion/expr/index", "autoapi/datafusion/functions/index", "autoapi/datafusion/html_formatter/index", "autoapi/datafusion/index", "autoapi/datafusion/input/base/index", "autoapi/datafusion/input/index", "autoapi/datafusion/input/location/index", "autoapi/datafusion/io/index", "autoapi/datafusion/object_store/index", "autoapi/datafusion/options/index", "autoapi/datafusion/plan/index", "autoapi/datafusion/record_batch/index", "autoapi/datafusion/substrait/index", "autoapi/datafusion/unparser/index", "autoapi/datafusion/user_defined/index", "autoapi/index", "contributor-guide/ffi", "contributor-guide/introduction", "index", "user-guide/basics", "user-guide/common-operations/aggregations", "user-guide/common-operations/basic-info", "user-guide/common-operations/expressions", "user-guide/common-operations/functions", "user-guide/common-operations/index", "user-guide/common-operations/joins", "user-guide/common-operations/select-and-filter", "user-guide/common-operations/udf-and-udfa", "user-guide/common-operations/views", "user-guide/common-operations/windows", "user-guide/configuration", "user-guide/data-sources", "user-guide/dataframe/index", "user-guide/dataframe/rendering", "user-guide/introduction", "user-guide/io/arrow", "user-guide/io/avro", "user-guide/io/csv", "user-guide/io/index", "user-guide/io/json", "user-guide/io/parquet", "user-guide/io/table_provider", "user-guide/sql", "user-guide/upgrade-guides"], "envversion": {"sphinx": 64, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["autoapi/datafusion/catalog/index.rst", "autoapi/datafusion/context/index.rst", "autoapi/datafusion/dataframe/index.rst", "autoapi/datafusion/dataframe_formatter/index.rst", "autoapi/datafusion/expr/index.rst", "autoapi/datafusion/functions/index.rst", "autoapi/datafusion/html_formatter/index.rst", "autoapi/datafusion/index.rst", "autoapi/datafusion/input/base/index.rst", "autoapi/datafusion/input/index.rst", "autoapi/datafusion/input/location/index.rst", "autoapi/datafusion/io/index.rst", "autoapi/datafusion/object_store/index.rst", "autoapi/datafusion/options/index.rst", "autoapi/datafusion/plan/index.rst", "autoapi/datafusion/record_batch/index.rst", "autoapi/datafusion/substrait/index.rst", "autoapi/datafusion/unparser/index.rst", "autoapi/datafusion/user_defined/index.rst", "autoapi/index.rst", "contributor-guide/ffi.rst", "contributor-guide/introduction.rst", "index.rst", "user-guide/basics.rst", "user-guide/common-operations/aggregations.rst", "user-guide/common-operations/basic-info.rst", "user-guide/common-operations/expressions.rst", "user-guide/common-operations/functions.rst", "user-guide/common-operations/index.rst", "user-guide/common-operations/joins.rst", "user-guide/common-operations/select-and-filter.rst", "user-guide/common-operations/udf-and-udfa.rst", "user-guide/common-operations/views.rst", "user-guide/common-operations/windows.rst", "user-guide/configuration.rst", "user-guide/data-sources.rst", "user-guide/dataframe/index.rst", "user-guide/dataframe/rendering.rst", "user-guide/introduction.rst", "user-guide/io/arrow.rst", "user-guide/io/avro.rst", "user-guide/io/csv.rst", "user-guide/io/index.rst", "user-guide/io/json.rst", "user-guide/io/parquet.rst", "user-guide/io/table_provider.rst", "user-guide/sql.rst", "user-guide/upgrade-guides.rst"], "indexentries": {"__add__() (datafusion.expr method)": [[7, "datafusion.Expr.__add__", false]], "__add__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__add__", false]], "__aiter__() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.__aiter__", false]], "__aiter__() (datafusion.record_batch.recordbatchstream method)": [[15, "datafusion.record_batch.RecordBatchStream.__aiter__", false]], "__aiter__() (datafusion.recordbatchstream method)": [[7, "datafusion.RecordBatchStream.__aiter__", false]], "__and__() (datafusion.expr method)": [[7, "datafusion.Expr.__and__", false]], "__and__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__and__", false]], "__anext__() (datafusion.record_batch.recordbatchstream method)": [[15, "datafusion.record_batch.RecordBatchStream.__anext__", false]], "__anext__() (datafusion.recordbatchstream method)": [[7, "datafusion.RecordBatchStream.__anext__", false]], "__arrow_c_array__() (datafusion.context.arrowarrayexportable method)": [[1, "datafusion.context.ArrowArrayExportable.__arrow_c_array__", false]], "__arrow_c_array__() (datafusion.record_batch.recordbatch method)": [[15, "datafusion.record_batch.RecordBatch.__arrow_c_array__", false]], "__arrow_c_array__() (datafusion.recordbatch method)": [[7, "datafusion.RecordBatch.__arrow_c_array__", false]], "__arrow_c_stream__() (datafusion.context.arrowstreamexportable method)": [[1, "datafusion.context.ArrowStreamExportable.__arrow_c_stream__", false]], "__arrow_c_stream__() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.__arrow_c_stream__", false]], "__call__() (datafusion.aggregateudf method)": [[7, "datafusion.AggregateUDF.__call__", false]], "__call__() (datafusion.dataframe_formatter.cellformatter method)": [[3, "datafusion.dataframe_formatter.CellFormatter.__call__", false]], "__call__() (datafusion.scalarudf method)": [[7, "datafusion.ScalarUDF.__call__", false]], "__call__() (datafusion.tablefunction method)": [[7, "datafusion.TableFunction.__call__", false]], "__call__() (datafusion.user_defined.aggregateudf method)": [[18, "datafusion.user_defined.AggregateUDF.__call__", false]], "__call__() (datafusion.user_defined.scalarudf method)": [[18, "datafusion.user_defined.ScalarUDF.__call__", false]], "__call__() (datafusion.user_defined.tablefunction method)": [[18, "datafusion.user_defined.TableFunction.__call__", false]], "__call__() (datafusion.user_defined.windowudf method)": [[18, "datafusion.user_defined.WindowUDF.__call__", false]], "__call__() (datafusion.windowudf method)": [[7, "datafusion.WindowUDF.__call__", false]], "__datafusion_aggregate_udf__() (datafusion.user_defined.aggregateudfexportable method)": [[18, "datafusion.user_defined.AggregateUDFExportable.__datafusion_aggregate_udf__", false]], "__datafusion_logical_extension_codec__() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.__datafusion_logical_extension_codec__", false]], "__datafusion_scalar_udf__() (datafusion.user_defined.scalarudfexportable method)": [[18, "datafusion.user_defined.ScalarUDFExportable.__datafusion_scalar_udf__", false]], "__datafusion_table_provider__() (datafusion.context.tableproviderexportable method)": [[1, "datafusion.context.TableProviderExportable.__datafusion_table_provider__", false]], "__datafusion_task_context_provider__() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.__datafusion_task_context_provider__", false]], "__datafusion_window_udf__() (datafusion.user_defined.windowudfexportable method)": [[18, "datafusion.user_defined.WindowUDFExportable.__datafusion_window_udf__", false]], "__eq__() (datafusion.expr method)": [[7, "datafusion.Expr.__eq__", false]], "__eq__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__eq__", false]], "__eq__() (datafusion.logicalplan method)": [[7, "datafusion.LogicalPlan.__eq__", false]], "__eq__() (datafusion.plan.logicalplan method)": [[14, "datafusion.plan.LogicalPlan.__eq__", false]], "__ge__() (datafusion.expr method)": [[7, "datafusion.Expr.__ge__", false]], "__ge__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__ge__", false]], "__getitem__() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.__getitem__", false]], "__getitem__() (datafusion.expr method)": [[7, "datafusion.Expr.__getitem__", false]], "__getitem__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__getitem__", false]], "__gt__() (datafusion.expr method)": [[7, "datafusion.Expr.__gt__", false]], "__gt__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__gt__", false]], "__invert__() (datafusion.expr method)": [[7, "datafusion.Expr.__invert__", false]], "__invert__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__invert__", false]], "__iter__() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.__iter__", false]], "__iter__() (datafusion.record_batch.recordbatchstream method)": [[15, "datafusion.record_batch.RecordBatchStream.__iter__", false]], "__iter__() (datafusion.recordbatchstream method)": [[7, "datafusion.RecordBatchStream.__iter__", false]], "__le__() (datafusion.expr method)": [[7, "datafusion.Expr.__le__", false]], "__le__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__le__", false]], "__lt__() (datafusion.expr method)": [[7, "datafusion.Expr.__lt__", false]], "__lt__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__lt__", false]], "__mod__() (datafusion.expr method)": [[7, "datafusion.Expr.__mod__", false]], "__mod__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__mod__", false]], "__mul__() (datafusion.expr method)": [[7, "datafusion.Expr.__mul__", false]], "__mul__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__mul__", false]], "__ne__() (datafusion.expr method)": [[7, "datafusion.Expr.__ne__", false]], "__ne__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__ne__", false]], "__next__() (datafusion.record_batch.recordbatchstream method)": [[15, "datafusion.record_batch.RecordBatchStream.__next__", false]], "__next__() (datafusion.recordbatchstream method)": [[7, "datafusion.RecordBatchStream.__next__", false]], "__or__() (datafusion.expr method)": [[7, "datafusion.Expr.__or__", false]], "__or__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__or__", false]], "__radd__ (datafusion.expr attribute)": [[7, "datafusion.Expr.__radd__", false]], "__radd__ (datafusion.expr.expr attribute)": [[4, "datafusion.expr.Expr.__radd__", false]], "__rand__ (datafusion.expr attribute)": [[7, "datafusion.Expr.__rand__", false]], "__rand__ (datafusion.expr.expr attribute)": [[4, "datafusion.expr.Expr.__rand__", false]], "__repr__() (datafusion.aggregateudf method)": [[7, "datafusion.AggregateUDF.__repr__", false]], "__repr__() (datafusion.catalog method)": [[7, "datafusion.Catalog.__repr__", false]], "__repr__() (datafusion.catalog.catalog method)": [[0, "datafusion.catalog.Catalog.__repr__", false]], "__repr__() (datafusion.catalog.cataloglist method)": [[0, "datafusion.catalog.CatalogList.__repr__", false]], "__repr__() (datafusion.catalog.schema method)": [[0, "datafusion.catalog.Schema.__repr__", false]], "__repr__() (datafusion.catalog.table method)": [[0, "datafusion.catalog.Table.__repr__", false]], "__repr__() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.__repr__", false]], "__repr__() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.__repr__", false]], "__repr__() (datafusion.executionplan method)": [[7, "datafusion.ExecutionPlan.__repr__", false]], "__repr__() (datafusion.expr method)": [[7, "datafusion.Expr.__repr__", false]], "__repr__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__repr__", false]], "__repr__() (datafusion.expr.sortexpr method)": [[4, "datafusion.expr.SortExpr.__repr__", false]], "__repr__() (datafusion.expr.windowframe method)": [[4, "datafusion.expr.WindowFrame.__repr__", false]], "__repr__() (datafusion.logicalplan method)": [[7, "datafusion.LogicalPlan.__repr__", false]], "__repr__() (datafusion.plan.executionplan method)": [[14, "datafusion.plan.ExecutionPlan.__repr__", false]], "__repr__() (datafusion.plan.logicalplan method)": [[14, "datafusion.plan.LogicalPlan.__repr__", false]], "__repr__() (datafusion.scalarudf method)": [[7, "datafusion.ScalarUDF.__repr__", false]], "__repr__() (datafusion.table method)": [[7, "datafusion.Table.__repr__", false]], "__repr__() (datafusion.tablefunction method)": [[7, "datafusion.TableFunction.__repr__", false]], "__repr__() (datafusion.user_defined.aggregateudf method)": [[18, "datafusion.user_defined.AggregateUDF.__repr__", false]], "__repr__() (datafusion.user_defined.scalarudf method)": [[18, "datafusion.user_defined.ScalarUDF.__repr__", false]], "__repr__() (datafusion.user_defined.tablefunction method)": [[18, "datafusion.user_defined.TableFunction.__repr__", false]], "__repr__() (datafusion.user_defined.windowudf method)": [[18, "datafusion.user_defined.WindowUDF.__repr__", false]], "__repr__() (datafusion.windowframe method)": [[7, "datafusion.WindowFrame.__repr__", false]], "__repr__() (datafusion.windowudf method)": [[7, "datafusion.WindowUDF.__repr__", false]], "__richcmp__() (datafusion.expr method)": [[7, "datafusion.Expr.__richcmp__", false]], "__richcmp__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__richcmp__", false]], "__rmod__ (datafusion.expr attribute)": [[7, "datafusion.Expr.__rmod__", false]], "__rmod__ (datafusion.expr.expr attribute)": [[4, "datafusion.expr.Expr.__rmod__", false]], "__rmul__ (datafusion.expr attribute)": [[7, "datafusion.Expr.__rmul__", false]], "__rmul__ (datafusion.expr.expr attribute)": [[4, "datafusion.expr.Expr.__rmul__", false]], "__ror__ (datafusion.expr attribute)": [[7, "datafusion.Expr.__ror__", false]], "__ror__ (datafusion.expr.expr attribute)": [[4, "datafusion.expr.Expr.__ror__", false]], "__rsub__ (datafusion.expr attribute)": [[7, "datafusion.Expr.__rsub__", false]], "__rsub__ (datafusion.expr.expr attribute)": [[4, "datafusion.expr.Expr.__rsub__", false]], "__rtruediv__ (datafusion.expr attribute)": [[7, "datafusion.Expr.__rtruediv__", false]], "__rtruediv__ (datafusion.expr.expr attribute)": [[4, "datafusion.expr.Expr.__rtruediv__", false]], "__slots__ (datafusion.catalog.table attribute)": [[0, "datafusion.catalog.Table.__slots__", false]], "__slots__ (datafusion.table attribute)": [[7, "datafusion.Table.__slots__", false]], "__str__() (datafusion.user_defined.volatility method)": [[18, "datafusion.user_defined.Volatility.__str__", false]], "__sub__() (datafusion.expr method)": [[7, "datafusion.Expr.__sub__", false]], "__sub__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__sub__", false]], "__truediv__() (datafusion.expr method)": [[7, "datafusion.Expr.__truediv__", false]], "__truediv__() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.__truediv__", false]], "_build_expandable_cell() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._build_expandable_cell", false]], "_build_html_footer() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._build_html_footer", false]], "_build_html_header() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._build_html_header", false]], "_build_regular_cell() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._build_regular_cell", false]], "_build_table_body() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._build_table_body", false]], "_build_table_container_start() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._build_table_container_start", false]], "_build_table_header() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._build_table_header", false]], "_convert_file_sort_order() (datafusion.context.sessioncontext static method)": [[1, "datafusion.context.SessionContext._convert_file_sort_order", false]], "_convert_table_partition_cols() (datafusion.context.sessioncontext static method)": [[1, "datafusion.context.SessionContext._convert_table_partition_cols", false]], "_create_table_udf() (datafusion.tablefunction static method)": [[7, "datafusion.TableFunction._create_table_udf", false]], "_create_table_udf() (datafusion.user_defined.tablefunction static method)": [[18, "datafusion.user_defined.TableFunction._create_table_udf", false]], "_create_table_udf_decorator() (datafusion.tablefunction static method)": [[7, "datafusion.TableFunction._create_table_udf_decorator", false]], "_create_table_udf_decorator() (datafusion.user_defined.tablefunction static method)": [[18, "datafusion.user_defined.TableFunction._create_table_udf_decorator", false]], "_create_window_udf() (datafusion.user_defined.windowudf static method)": [[18, "datafusion.user_defined.WindowUDF._create_window_udf", false]], "_create_window_udf() (datafusion.windowudf static method)": [[7, "datafusion.WindowUDF._create_window_udf", false]], "_create_window_udf_decorator() (datafusion.user_defined.windowudf static method)": [[18, "datafusion.user_defined.WindowUDF._create_window_udf_decorator", false]], "_create_window_udf_decorator() (datafusion.windowudf static method)": [[7, "datafusion.WindowUDF._create_window_udf_decorator", false]], "_custom_cell_builder (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._custom_cell_builder", false]], "_custom_header_builder (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._custom_header_builder", false]], "_default_formatter (datafusion.dataframe_formatter.formattermanager attribute)": [[3, "datafusion.dataframe_formatter.FormatterManager._default_formatter", false]], "_format_cell_value() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._format_cell_value", false]], "_get_cell_value() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._get_cell_value", false]], "_get_default_css() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._get_default_css", false]], "_get_default_name() (datafusion.user_defined.windowudf static method)": [[18, "datafusion.user_defined.WindowUDF._get_default_name", false]], "_get_default_name() (datafusion.windowudf static method)": [[7, "datafusion.WindowUDF._get_default_name", false]], "_get_javascript() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._get_javascript", false]], "_inner (datafusion.catalog.table attribute)": [[0, "datafusion.catalog.Table._inner", false]], "_inner (datafusion.table attribute)": [[7, "datafusion.Table._inner", false]], "_is_pycapsule() (in module datafusion.user_defined)": [[18, "datafusion.user_defined._is_pycapsule", false]], "_max_rows (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._max_rows", false]], "_normalize_input_types() (datafusion.user_defined.windowudf static method)": [[18, "datafusion.user_defined.WindowUDF._normalize_input_types", false]], "_normalize_input_types() (datafusion.windowudf static method)": [[7, "datafusion.WindowUDF._normalize_input_types", false]], "_null_treatment (datafusion.expr.window attribute)": [[4, "datafusion.expr.Window._null_treatment", false]], "_order_by (datafusion.expr.window attribute)": [[4, "datafusion.expr.Window._order_by", false]], "_partition_by (datafusion.expr.window attribute)": [[4, "datafusion.expr.Window._partition_by", false]], "_r (in module datafusion.user_defined)": [[18, "datafusion.user_defined._R", false]], "_raw_plan (datafusion.executionplan attribute)": [[7, "datafusion.ExecutionPlan._raw_plan", false]], "_raw_plan (datafusion.logicalplan attribute)": [[7, "datafusion.LogicalPlan._raw_plan", false]], "_raw_plan (datafusion.plan.executionplan attribute)": [[14, "datafusion.plan.ExecutionPlan._raw_plan", false]], "_raw_plan (datafusion.plan.logicalplan attribute)": [[14, "datafusion.plan.LogicalPlan._raw_plan", false]], "_raw_schema (datafusion.catalog.schema attribute)": [[0, "datafusion.catalog.Schema._raw_schema", false]], "_raw_write_options (datafusion.dataframe.dataframewriteoptions attribute)": [[2, "datafusion.dataframe.DataFrameWriteOptions._raw_write_options", false]], "_raw_write_options (datafusion.dataframewriteoptions attribute)": [[7, "datafusion.DataFrameWriteOptions._raw_write_options", false]], "_refresh_formatter_reference() (in module datafusion.dataframe_formatter)": [[3, "datafusion.dataframe_formatter._refresh_formatter_reference", false]], "_repr_html_() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame._repr_html_", false]], "_to_pyarrow_types (datafusion.expr attribute)": [[7, "datafusion.Expr._to_pyarrow_types", false]], "_to_pyarrow_types (datafusion.expr.expr attribute)": [[4, "datafusion.expr.Expr._to_pyarrow_types", false]], "_type_formatters (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter._type_formatters", false]], "_udaf (datafusion.aggregateudf attribute)": [[7, "datafusion.AggregateUDF._udaf", false]], "_udaf (datafusion.user_defined.aggregateudf attribute)": [[18, "datafusion.user_defined.AggregateUDF._udaf", false]], "_udf (datafusion.scalarudf attribute)": [[7, "datafusion.ScalarUDF._udf", false]], "_udf (datafusion.user_defined.scalarudf attribute)": [[18, "datafusion.user_defined.ScalarUDF._udf", false]], "_udtf (datafusion.tablefunction attribute)": [[7, "datafusion.TableFunction._udtf", false]], "_udtf (datafusion.user_defined.tablefunction attribute)": [[18, "datafusion.user_defined.TableFunction._udtf", false]], "_udwf (datafusion.user_defined.windowudf attribute)": [[18, "datafusion.user_defined.WindowUDF._udwf", false]], "_udwf (datafusion.windowudf attribute)": [[7, "datafusion.WindowUDF._udwf", false]], "_validate_bool() (in module datafusion.dataframe_formatter)": [[3, "datafusion.dataframe_formatter._validate_bool", false]], "_validate_formatter_parameters() (in module datafusion.dataframe_formatter)": [[3, "datafusion.dataframe_formatter._validate_formatter_parameters", false]], "_validate_positive_int() (in module datafusion.dataframe_formatter)": [[3, "datafusion.dataframe_formatter._validate_positive_int", false]], "_window_frame (datafusion.expr.window attribute)": [[4, "datafusion.expr.Window._window_frame", false]], "abs() (datafusion.expr method)": [[7, "datafusion.Expr.abs", false]], "abs() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.abs", false]], "abs() (in module datafusion.functions)": [[5, "datafusion.functions.abs", false]], "accumulator (class in datafusion)": [[7, "datafusion.Accumulator", false]], "accumulator (class in datafusion.user_defined)": [[18, "datafusion.user_defined.Accumulator", false]], "acos() (datafusion.expr method)": [[7, "datafusion.Expr.acos", false]], "acos() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.acos", false]], "acos() (in module datafusion.functions)": [[5, "datafusion.functions.acos", false]], "acosh() (datafusion.expr method)": [[7, "datafusion.Expr.acosh", false]], "acosh() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.acosh", false]], "acosh() (in module datafusion.functions)": [[5, "datafusion.functions.acosh", false]], "aggregate (in module datafusion.expr)": [[4, "datafusion.expr.Aggregate", false]], "aggregate() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.aggregate", false]], "aggregatefunction (in module datafusion.expr)": [[4, "datafusion.expr.AggregateFunction", false]], "aggregateudf (class in datafusion)": [[7, "datafusion.AggregateUDF", false]], "aggregateudf (class in datafusion.user_defined)": [[18, "datafusion.user_defined.AggregateUDF", false]], "aggregateudfexportable (class in datafusion.user_defined)": [[18, "datafusion.user_defined.AggregateUDFExportable", false]], "alias (in module datafusion.expr)": [[4, "datafusion.expr.Alias", false]], "alias() (datafusion.expr method)": [[7, "datafusion.Expr.alias", false]], "alias() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.alias", false]], "alias() (in module datafusion.functions)": [[5, "datafusion.functions.alias", false]], "allow_single_file_parallelism (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.allow_single_file_parallelism", false]], "allow_single_file_parallelism (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.allow_single_file_parallelism", false]], "amazons3 (in module datafusion.object_store)": [[12, "datafusion.object_store.AmazonS3", false]], "analyze (in module datafusion.expr)": [[4, "datafusion.expr.Analyze", false]], "append (datafusion.dataframe.insertop attribute)": [[2, "datafusion.dataframe.InsertOp.APPEND", false]], "append (datafusion.insertop attribute)": [[7, "datafusion.InsertOp.APPEND", false]], "approx_distinct() (in module datafusion.functions)": [[5, "datafusion.functions.approx_distinct", false]], "approx_median() (in module datafusion.functions)": [[5, "datafusion.functions.approx_median", false]], "approx_percentile_cont() (in module datafusion.functions)": [[5, "datafusion.functions.approx_percentile_cont", false]], "approx_percentile_cont_with_weight() (in module datafusion.functions)": [[5, "datafusion.functions.approx_percentile_cont_with_weight", false]], "array() (in module datafusion.functions)": [[5, "datafusion.functions.array", false]], "array_agg() (in module datafusion.functions)": [[5, "datafusion.functions.array_agg", false]], "array_append() (in module datafusion.functions)": [[5, "datafusion.functions.array_append", false]], "array_cat() (in module datafusion.functions)": [[5, "datafusion.functions.array_cat", false]], "array_concat() (in module datafusion.functions)": [[5, "datafusion.functions.array_concat", false]], "array_dims() (datafusion.expr method)": [[7, "datafusion.Expr.array_dims", false]], "array_dims() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.array_dims", false]], "array_dims() (in module datafusion.functions)": [[5, "datafusion.functions.array_dims", false]], "array_distinct() (datafusion.expr method)": [[7, "datafusion.Expr.array_distinct", false]], "array_distinct() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.array_distinct", false]], "array_distinct() (in module datafusion.functions)": [[5, "datafusion.functions.array_distinct", false]], "array_element() (in module datafusion.functions)": [[5, "datafusion.functions.array_element", false]], "array_empty() (datafusion.expr method)": [[7, "datafusion.Expr.array_empty", false]], "array_empty() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.array_empty", false]], "array_empty() (in module datafusion.functions)": [[5, "datafusion.functions.array_empty", false]], "array_except() (in module datafusion.functions)": [[5, "datafusion.functions.array_except", false]], "array_extract() (in module datafusion.functions)": [[5, "datafusion.functions.array_extract", false]], "array_has() (in module datafusion.functions)": [[5, "datafusion.functions.array_has", false]], "array_has_all() (in module datafusion.functions)": [[5, "datafusion.functions.array_has_all", false]], "array_has_any() (in module datafusion.functions)": [[5, "datafusion.functions.array_has_any", false]], "array_indexof() (in module datafusion.functions)": [[5, "datafusion.functions.array_indexof", false]], "array_intersect() (in module datafusion.functions)": [[5, "datafusion.functions.array_intersect", false]], "array_join() (in module datafusion.functions)": [[5, "datafusion.functions.array_join", false]], "array_length() (datafusion.expr method)": [[7, "datafusion.Expr.array_length", false]], "array_length() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.array_length", false]], "array_length() (in module datafusion.functions)": [[5, "datafusion.functions.array_length", false]], "array_ndims() (datafusion.expr method)": [[7, "datafusion.Expr.array_ndims", false]], "array_ndims() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.array_ndims", false]], "array_ndims() (in module datafusion.functions)": [[5, "datafusion.functions.array_ndims", false]], "array_pop_back() (datafusion.expr method)": [[7, "datafusion.Expr.array_pop_back", false]], "array_pop_back() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.array_pop_back", false]], "array_pop_back() (in module datafusion.functions)": [[5, "datafusion.functions.array_pop_back", false]], "array_pop_front() (datafusion.expr method)": [[7, "datafusion.Expr.array_pop_front", false]], "array_pop_front() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.array_pop_front", false]], "array_pop_front() (in module datafusion.functions)": [[5, "datafusion.functions.array_pop_front", false]], "array_position() (in module datafusion.functions)": [[5, "datafusion.functions.array_position", false]], "array_positions() (in module datafusion.functions)": [[5, "datafusion.functions.array_positions", false]], "array_prepend() (in module datafusion.functions)": [[5, "datafusion.functions.array_prepend", false]], "array_push_back() (in module datafusion.functions)": [[5, "datafusion.functions.array_push_back", false]], "array_push_front() (in module datafusion.functions)": [[5, "datafusion.functions.array_push_front", false]], "array_remove() (in module datafusion.functions)": [[5, "datafusion.functions.array_remove", false]], "array_remove_all() (in module datafusion.functions)": [[5, "datafusion.functions.array_remove_all", false]], "array_remove_n() (in module datafusion.functions)": [[5, "datafusion.functions.array_remove_n", false]], "array_repeat() (in module datafusion.functions)": [[5, "datafusion.functions.array_repeat", false]], "array_replace() (in module datafusion.functions)": [[5, "datafusion.functions.array_replace", false]], "array_replace_all() (in module datafusion.functions)": [[5, "datafusion.functions.array_replace_all", false]], "array_replace_n() (in module datafusion.functions)": [[5, "datafusion.functions.array_replace_n", false]], "array_resize() (in module datafusion.functions)": [[5, "datafusion.functions.array_resize", false]], "array_slice() (in module datafusion.functions)": [[5, "datafusion.functions.array_slice", false]], "array_sort() (in module datafusion.functions)": [[5, "datafusion.functions.array_sort", false]], "array_to_string() (in module datafusion.functions)": [[5, "datafusion.functions.array_to_string", false]], "array_union() (in module datafusion.functions)": [[5, "datafusion.functions.array_union", false]], "arrow_cast() (in module datafusion.functions)": [[5, "datafusion.functions.arrow_cast", false]], "arrow_typeof() (datafusion.expr method)": [[7, "datafusion.Expr.arrow_typeof", false]], "arrow_typeof() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.arrow_typeof", false]], "arrow_typeof() (in module datafusion.functions)": [[5, "datafusion.functions.arrow_typeof", false]], "arrowarrayexportable (class in datafusion.context)": [[1, "datafusion.context.ArrowArrayExportable", false]], "arrowstreamexportable (class in datafusion.context)": [[1, "datafusion.context.ArrowStreamExportable", false]], "ascending() (datafusion.expr.sortexpr method)": [[4, "datafusion.expr.SortExpr.ascending", false]], "ascii() (datafusion.expr method)": [[7, "datafusion.Expr.ascii", false]], "ascii() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.ascii", false]], "ascii() (in module datafusion.functions)": [[5, "datafusion.functions.ascii", false]], "asin() (datafusion.expr method)": [[7, "datafusion.Expr.asin", false]], "asin() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.asin", false]], "asin() (in module datafusion.functions)": [[5, "datafusion.functions.asin", false]], "asinh() (datafusion.expr method)": [[7, "datafusion.Expr.asinh", false]], "asinh() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.asinh", false]], "asinh() (in module datafusion.functions)": [[5, "datafusion.functions.asinh", false]], "atan() (datafusion.expr method)": [[7, "datafusion.Expr.atan", false]], "atan() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.atan", false]], "atan() (in module datafusion.functions)": [[5, "datafusion.functions.atan", false]], "atan2() (in module datafusion.functions)": [[5, "datafusion.functions.atan2", false]], "atanh() (datafusion.expr method)": [[7, "datafusion.Expr.atanh", false]], "atanh() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.atanh", false]], "atanh() (in module datafusion.functions)": [[5, "datafusion.functions.atanh", false]], "avg() (in module datafusion.functions)": [[5, "datafusion.functions.avg", false]], "baseinputsource (class in datafusion.input.base)": [[8, "datafusion.input.base.BaseInputSource", false]], "between (in module datafusion.expr)": [[4, "datafusion.expr.Between", false]], "between() (datafusion.expr method)": [[7, "datafusion.Expr.between", false]], "between() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.between", false]], "binaryexpr (in module datafusion.expr)": [[4, "datafusion.expr.BinaryExpr", false]], "bit_and() (in module datafusion.functions)": [[5, "datafusion.functions.bit_and", false]], "bit_length() (datafusion.expr method)": [[7, "datafusion.Expr.bit_length", false]], "bit_length() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.bit_length", false]], "bit_length() (in module datafusion.functions)": [[5, "datafusion.functions.bit_length", false]], "bit_or() (in module datafusion.functions)": [[5, "datafusion.functions.bit_or", false]], "bit_xor() (in module datafusion.functions)": [[5, "datafusion.functions.bit_xor", false]], "bloom_filter_enabled (datafusion.dataframe.parquetcolumnoptions attribute)": [[2, "datafusion.dataframe.ParquetColumnOptions.bloom_filter_enabled", false]], "bloom_filter_enabled (datafusion.parquetcolumnoptions attribute)": [[7, "datafusion.ParquetColumnOptions.bloom_filter_enabled", false]], "bloom_filter_fpp (datafusion.dataframe.parquetcolumnoptions attribute)": [[2, "datafusion.dataframe.ParquetColumnOptions.bloom_filter_fpp", false]], "bloom_filter_fpp (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.bloom_filter_fpp", false]], "bloom_filter_fpp (datafusion.parquetcolumnoptions attribute)": [[7, "datafusion.ParquetColumnOptions.bloom_filter_fpp", false]], "bloom_filter_fpp (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.bloom_filter_fpp", false]], "bloom_filter_ndv (datafusion.dataframe.parquetcolumnoptions attribute)": [[2, "datafusion.dataframe.ParquetColumnOptions.bloom_filter_ndv", false]], "bloom_filter_ndv (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.bloom_filter_ndv", false]], "bloom_filter_ndv (datafusion.parquetcolumnoptions attribute)": [[7, "datafusion.ParquetColumnOptions.bloom_filter_ndv", false]], "bloom_filter_ndv (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.bloom_filter_ndv", false]], "bloom_filter_on_write (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.bloom_filter_on_write", false]], "bloom_filter_on_write (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.bloom_filter_on_write", false]], "bool_and() (in module datafusion.functions)": [[5, "datafusion.functions.bool_and", false]], "bool_or() (in module datafusion.functions)": [[5, "datafusion.functions.bool_or", false]], "brotli (datafusion.dataframe.compression attribute)": [[2, "datafusion.dataframe.Compression.BROTLI", false]], "btrim() (datafusion.expr method)": [[7, "datafusion.Expr.btrim", false]], "btrim() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.btrim", false]], "btrim() (in module datafusion.functions)": [[5, "datafusion.functions.btrim", false]], "build_table() (datafusion.input.base.baseinputsource method)": [[8, "datafusion.input.base.BaseInputSource.build_table", false]], "build_table() (datafusion.input.location.locationinputplugin method)": [[10, "datafusion.input.location.LocationInputPlugin.build_table", false]], "build_table() (datafusion.input.locationinputplugin method)": [[9, "datafusion.input.LocationInputPlugin.build_table", false]], "cache() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.cache", false]], "canonical_name() (datafusion.expr method)": [[7, "datafusion.Expr.canonical_name", false]], "canonical_name() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.canonical_name", false]], "cardinality() (datafusion.expr method)": [[7, "datafusion.Expr.cardinality", false]], "cardinality() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.cardinality", false]], "cardinality() (in module datafusion.functions)": [[5, "datafusion.functions.cardinality", false]], "case (in module datafusion.expr)": [[4, "datafusion.expr.Case", false]], "case() (in module datafusion.functions)": [[5, "datafusion.functions.case", false]], "case_builder (datafusion.expr.casebuilder attribute)": [[4, "datafusion.expr.CaseBuilder.case_builder", false]], "casebuilder (class in datafusion.expr)": [[4, "datafusion.expr.CaseBuilder", false]], "cast (in module datafusion.expr)": [[4, "datafusion.expr.Cast", false]], "cast() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.cast", false]], "cast() (datafusion.expr method)": [[7, "datafusion.Expr.cast", false]], "cast() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.cast", false]], "catalog (class in datafusion)": [[7, "datafusion.Catalog", false]], "catalog (class in datafusion.catalog)": [[0, "datafusion.catalog.Catalog", false]], "catalog (datafusion.catalog attribute)": [[7, "datafusion.Catalog.catalog", false]], "catalog (datafusion.catalog.catalog attribute)": [[0, "datafusion.catalog.Catalog.catalog", false]], "catalog() (datafusion.catalog.cataloglist method)": [[0, "datafusion.catalog.CatalogList.catalog", false]], "catalog() (datafusion.catalog.catalogproviderlist method)": [[0, "datafusion.catalog.CatalogProviderList.catalog", false]], "catalog() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.catalog", false]], "catalog_list (datafusion.catalog.cataloglist attribute)": [[0, "datafusion.catalog.CatalogList.catalog_list", false]], "catalog_names() (datafusion.catalog.cataloglist method)": [[0, "datafusion.catalog.CatalogList.catalog_names", false]], "catalog_names() (datafusion.catalog.catalogproviderlist method)": [[0, "datafusion.catalog.CatalogProviderList.catalog_names", false]], "catalog_names() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.catalog_names", false]], "cataloglist (class in datafusion.catalog)": [[0, "datafusion.catalog.CatalogList", false]], "catalogprovider (class in datafusion.catalog)": [[0, "datafusion.catalog.CatalogProvider", false]], "catalogproviderlist (class in datafusion.catalog)": [[0, "datafusion.catalog.CatalogProviderList", false]], "cbrt() (datafusion.expr method)": [[7, "datafusion.Expr.cbrt", false]], "cbrt() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.cbrt", false]], "cbrt() (in module datafusion.functions)": [[5, "datafusion.functions.cbrt", false]], "ceil() (datafusion.expr method)": [[7, "datafusion.Expr.ceil", false]], "ceil() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.ceil", false]], "ceil() (in module datafusion.functions)": [[5, "datafusion.functions.ceil", false]], "cellformatter (class in datafusion.dataframe_formatter)": [[3, "datafusion.dataframe_formatter.CellFormatter", false]], "char_length() (datafusion.expr method)": [[7, "datafusion.Expr.char_length", false]], "char_length() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.char_length", false]], "char_length() (in module datafusion.functions)": [[5, "datafusion.functions.char_length", false]], "character_length() (datafusion.expr method)": [[7, "datafusion.Expr.character_length", false]], "character_length() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.character_length", false]], "character_length() (in module datafusion.functions)": [[5, "datafusion.functions.character_length", false]], "children() (datafusion.executionplan method)": [[7, "datafusion.ExecutionPlan.children", false]], "children() (datafusion.plan.executionplan method)": [[14, "datafusion.plan.ExecutionPlan.children", false]], "chr() (datafusion.expr method)": [[7, "datafusion.Expr.chr", false]], "chr() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.chr", false]], "chr() (in module datafusion.functions)": [[5, "datafusion.functions.chr", false]], "coalesce() (in module datafusion.functions)": [[5, "datafusion.functions.coalesce", false]], "col (in module datafusion)": [[7, "datafusion.col", false]], "col() (in module datafusion.functions)": [[5, "datafusion.functions.col", false]], "collect() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.collect", false]], "collect_column() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.collect_column", false]], "collect_partitioned() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.collect_partitioned", false]], "column (in module datafusion)": [[7, "datafusion.column", false]], "column (in module datafusion.expr)": [[4, "datafusion.expr.Column", false]], "column() (datafusion.expr static method)": [[7, "datafusion.Expr.column", false]], "column() (datafusion.expr.expr static method)": [[4, "datafusion.expr.Expr.column", false]], "column_index_truncate_length (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.column_index_truncate_length", false]], "column_index_truncate_length (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.column_index_truncate_length", false]], "column_name() (datafusion.expr method)": [[7, "datafusion.Expr.column_name", false]], "column_name() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.column_name", false]], "column_specific_options (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.column_specific_options", false]], "column_specific_options (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.column_specific_options", false]], "comment (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.comment", false]], "comment (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.comment", false]], "compression (class in datafusion.dataframe)": [[2, "datafusion.dataframe.Compression", false]], "compression (datafusion.dataframe.parquetcolumnoptions attribute)": [[2, "datafusion.dataframe.ParquetColumnOptions.compression", false]], "compression (datafusion.parquetcolumnoptions attribute)": [[7, "datafusion.ParquetColumnOptions.compression", false]], "concat() (in module datafusion.functions)": [[5, "datafusion.functions.concat", false]], "concat_ws() (in module datafusion.functions)": [[5, "datafusion.functions.concat_ws", false]], "config_internal (datafusion.context.runtimeenvbuilder attribute)": [[1, "datafusion.context.RuntimeEnvBuilder.config_internal", false]], "config_internal (datafusion.context.sessionconfig attribute)": [[1, "datafusion.context.SessionConfig.config_internal", false]], "config_internal (datafusion.runtimeenvbuilder attribute)": [[7, "datafusion.RuntimeEnvBuilder.config_internal", false]], "config_internal (datafusion.sessionconfig attribute)": [[7, "datafusion.SessionConfig.config_internal", false]], "configure_formatter() (in module datafusion)": [[7, "datafusion.configure_formatter", false]], "configure_formatter() (in module datafusion.dataframe_formatter)": [[3, "datafusion.dataframe_formatter.configure_formatter", false]], "consumer (class in datafusion.substrait)": [[16, "datafusion.substrait.Consumer", false]], "copyto (in module datafusion.expr)": [[4, "datafusion.expr.CopyTo", false]], "corr() (in module datafusion.functions)": [[5, "datafusion.functions.corr", false]], "cos() (datafusion.expr method)": [[7, "datafusion.Expr.cos", false]], "cos() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.cos", false]], "cos() (in module datafusion.functions)": [[5, "datafusion.functions.cos", false]], "cosh() (datafusion.expr method)": [[7, "datafusion.Expr.cosh", false]], "cosh() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.cosh", false]], "cosh() (in module datafusion.functions)": [[5, "datafusion.functions.cosh", false]], "cot() (datafusion.expr method)": [[7, "datafusion.Expr.cot", false]], "cot() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.cot", false]], "cot() (in module datafusion.functions)": [[5, "datafusion.functions.cot", false]], "count() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.count", false]], "count() (in module datafusion.functions)": [[5, "datafusion.functions.count", false]], "count_star() (in module datafusion.functions)": [[5, "datafusion.functions.count_star", false]], "covar() (in module datafusion.functions)": [[5, "datafusion.functions.covar", false]], "covar_pop() (in module datafusion.functions)": [[5, "datafusion.functions.covar_pop", false]], "covar_samp() (in module datafusion.functions)": [[5, "datafusion.functions.covar_samp", false]], "create_dataframe() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.create_dataframe", false]], "create_dataframe_from_logical_plan() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.create_dataframe_from_logical_plan", false]], "createcatalog (in module datafusion.expr)": [[4, "datafusion.expr.CreateCatalog", false]], "createcatalogschema (in module datafusion.expr)": [[4, "datafusion.expr.CreateCatalogSchema", false]], "created_by (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.created_by", false]], "created_by (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.created_by", false]], "createexternaltable (in module datafusion.expr)": [[4, "datafusion.expr.CreateExternalTable", false]], "createfunction (in module datafusion.expr)": [[4, "datafusion.expr.CreateFunction", false]], "createfunctionbody (in module datafusion.expr)": [[4, "datafusion.expr.CreateFunctionBody", false]], "createindex (in module datafusion.expr)": [[4, "datafusion.expr.CreateIndex", false]], "creatememorytable (in module datafusion.expr)": [[4, "datafusion.expr.CreateMemoryTable", false]], "createview (in module datafusion.expr)": [[4, "datafusion.expr.CreateView", false]], "csvreadoptions (class in datafusion)": [[7, "datafusion.CsvReadOptions", false]], "csvreadoptions (class in datafusion.options)": [[13, "datafusion.options.CsvReadOptions", false]], "ctx (datafusion.context.sessioncontext attribute)": [[1, "datafusion.context.SessionContext.ctx", false]], "cume_dist() (in module datafusion.functions)": [[5, "datafusion.functions.cume_dist", false]], "current_date() (in module datafusion.functions)": [[5, "datafusion.functions.current_date", false]], "current_time() (in module datafusion.functions)": [[5, "datafusion.functions.current_time", false]], "custom_css (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.custom_css", false]], "data_page_row_count_limit (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.data_page_row_count_limit", false]], "data_page_row_count_limit (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.data_page_row_count_limit", false]], "data_pagesize_limit (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.data_pagesize_limit", false]], "data_pagesize_limit (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.data_pagesize_limit", false]], "data_type_or_field_to_field() (in module datafusion.user_defined)": [[18, "datafusion.user_defined.data_type_or_field_to_field", false]], "data_types_or_fields_to_field_list() (in module datafusion.user_defined)": [[18, "datafusion.user_defined.data_types_or_fields_to_field_list", false]], "database (class in datafusion)": [[7, "datafusion.Database", false]], "database() (datafusion.catalog method)": [[7, "datafusion.Catalog.database", false]], "database() (datafusion.catalog.catalog method)": [[0, "datafusion.catalog.Catalog.database", false]], "dataframe (class in datafusion.dataframe)": [[2, "datafusion.dataframe.DataFrame", false]], "dataframehtmlformatter (class in datafusion.dataframe_formatter)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter", false]], "dataframewriteoptions (class in datafusion)": [[7, "datafusion.DataFrameWriteOptions", false]], "dataframewriteoptions (class in datafusion.dataframe)": [[2, "datafusion.dataframe.DataFrameWriteOptions", false]], "datafusion": [[7, "module-datafusion", false]], "datafusion.catalog": [[0, "module-datafusion.catalog", false]], "datafusion.context": [[1, "module-datafusion.context", false]], "datafusion.dataframe": [[2, "module-datafusion.dataframe", false]], "datafusion.dataframe_formatter": [[3, "module-datafusion.dataframe_formatter", false]], "datafusion.expr": [[4, "module-datafusion.expr", false]], "datafusion.functions": [[5, "module-datafusion.functions", false]], "datafusion.html_formatter": [[6, "module-datafusion.html_formatter", false]], "datafusion.input": [[9, "module-datafusion.input", false]], "datafusion.input.base": [[8, "module-datafusion.input.base", false]], "datafusion.input.location": [[10, "module-datafusion.input.location", false]], "datafusion.io": [[11, "module-datafusion.io", false]], "datafusion.object_store": [[12, "module-datafusion.object_store", false]], "datafusion.options": [[13, "module-datafusion.options", false]], "datafusion.plan": [[14, "module-datafusion.plan", false]], "datafusion.record_batch": [[15, "module-datafusion.record_batch", false]], "datafusion.substrait": [[16, "module-datafusion.substrait", false]], "datafusion.unparser": [[17, "module-datafusion.unparser", false]], "datafusion.user_defined": [[18, "module-datafusion.user_defined", false]], "date_bin() (in module datafusion.functions)": [[5, "datafusion.functions.date_bin", false]], "date_part() (in module datafusion.functions)": [[5, "datafusion.functions.date_part", false]], "date_trunc() (in module datafusion.functions)": [[5, "datafusion.functions.date_trunc", false]], "datepart() (in module datafusion.functions)": [[5, "datafusion.functions.datepart", false]], "datetrunc() (in module datafusion.functions)": [[5, "datafusion.functions.datetrunc", false]], "deallocate (in module datafusion.expr)": [[4, "datafusion.expr.Deallocate", false]], "decode() (in module datafusion.functions)": [[5, "datafusion.functions.decode", false]], "default() (datafusion.unparser.dialect static method)": [[17, "datafusion.unparser.Dialect.default", false]], "default_str_repr() (datafusion.dataframe.dataframe static method)": [[2, "datafusion.dataframe.DataFrame.default_str_repr", false]], "defaultstyleprovider (class in datafusion.dataframe_formatter)": [[3, "datafusion.dataframe_formatter.DefaultStyleProvider", false]], "degrees() (datafusion.expr method)": [[7, "datafusion.Expr.degrees", false]], "degrees() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.degrees", false]], "degrees() (in module datafusion.functions)": [[5, "datafusion.functions.degrees", false]], "delimiter (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.delimiter", false]], "delimiter (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.delimiter", false]], "dense_rank() (in module datafusion.functions)": [[5, "datafusion.functions.dense_rank", false]], "deregister_schema() (datafusion.catalog method)": [[7, "datafusion.Catalog.deregister_schema", false]], "deregister_schema() (datafusion.catalog.catalog method)": [[0, "datafusion.catalog.Catalog.deregister_schema", false]], "deregister_schema() (datafusion.catalog.catalogprovider method)": [[0, "datafusion.catalog.CatalogProvider.deregister_schema", false]], "deregister_table() (datafusion.catalog.schema method)": [[0, "datafusion.catalog.Schema.deregister_table", false]], "deregister_table() (datafusion.catalog.schemaprovider method)": [[0, "datafusion.catalog.SchemaProvider.deregister_table", false]], "deregister_table() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.deregister_table", false]], "describe() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.describe", false]], "describetable (in module datafusion.expr)": [[4, "datafusion.expr.DescribeTable", false]], "deserialize() (datafusion.substrait.serde static method)": [[16, "datafusion.substrait.Serde.deserialize", false]], "deserialize_bytes() (datafusion.substrait.serde static method)": [[16, "datafusion.substrait.Serde.deserialize_bytes", false]], "df (datafusion.dataframe.dataframe attribute)": [[2, "datafusion.dataframe.DataFrame.df", false]], "dfschema (in module datafusion)": [[7, "datafusion.DFSchema", false]], "dialect (class in datafusion.unparser)": [[17, "datafusion.unparser.Dialect", false]], "dialect (datafusion.unparser.dialect attribute)": [[17, "datafusion.unparser.Dialect.dialect", false]], "dictionary_enabled (datafusion.dataframe.parquetcolumnoptions attribute)": [[2, "datafusion.dataframe.ParquetColumnOptions.dictionary_enabled", false]], "dictionary_enabled (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.dictionary_enabled", false]], "dictionary_enabled (datafusion.parquetcolumnoptions attribute)": [[7, "datafusion.ParquetColumnOptions.dictionary_enabled", false]], "dictionary_enabled (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.dictionary_enabled", false]], "dictionary_page_size_limit (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.dictionary_page_size_limit", false]], "dictionary_page_size_limit (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.dictionary_page_size_limit", false]], "digest() (in module datafusion.functions)": [[5, "datafusion.functions.digest", false]], "display() (datafusion.executionplan method)": [[7, "datafusion.ExecutionPlan.display", false]], "display() (datafusion.logicalplan method)": [[7, "datafusion.LogicalPlan.display", false]], "display() (datafusion.plan.executionplan method)": [[14, "datafusion.plan.ExecutionPlan.display", false]], "display() (datafusion.plan.logicalplan method)": [[14, "datafusion.plan.LogicalPlan.display", false]], "display_graphviz() (datafusion.logicalplan method)": [[7, "datafusion.LogicalPlan.display_graphviz", false]], "display_graphviz() (datafusion.plan.logicalplan method)": [[14, "datafusion.plan.LogicalPlan.display_graphviz", false]], "display_indent() (datafusion.executionplan method)": [[7, "datafusion.ExecutionPlan.display_indent", false]], "display_indent() (datafusion.logicalplan method)": [[7, "datafusion.LogicalPlan.display_indent", false]], "display_indent() (datafusion.plan.executionplan method)": [[14, "datafusion.plan.ExecutionPlan.display_indent", false]], "display_indent() (datafusion.plan.logicalplan method)": [[14, "datafusion.plan.LogicalPlan.display_indent", false]], "display_indent_schema() (datafusion.logicalplan method)": [[7, "datafusion.LogicalPlan.display_indent_schema", false]], "display_indent_schema() (datafusion.plan.logicalplan method)": [[14, "datafusion.plan.LogicalPlan.display_indent_schema", false]], "display_name() (datafusion.expr method)": [[7, "datafusion.Expr.display_name", false]], "display_name() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.display_name", false]], "distinct (in module datafusion.expr)": [[4, "datafusion.expr.Distinct", false]], "distinct() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.distinct", false]], "distinct() (datafusion.expr method)": [[7, "datafusion.Expr.distinct", false]], "distinct() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.distinct", false]], "dmlstatement (in module datafusion.expr)": [[4, "datafusion.expr.DmlStatement", false]], "drop() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.drop", false]], "dropcatalogschema (in module datafusion.expr)": [[4, "datafusion.expr.DropCatalogSchema", false]], "dropfunction (in module datafusion.expr)": [[4, "datafusion.expr.DropFunction", false]], "droptable (in module datafusion.expr)": [[4, "datafusion.expr.DropTable", false]], "dropview (in module datafusion.expr)": [[4, "datafusion.expr.DropView", false]], "duckdb() (datafusion.unparser.dialect static method)": [[17, "datafusion.unparser.Dialect.duckdb", false]], "empty() (datafusion.expr method)": [[7, "datafusion.Expr.empty", false]], "empty() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.empty", false]], "empty() (in module datafusion.functions)": [[5, "datafusion.functions.empty", false]], "empty_table() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.empty_table", false]], "emptyrelation (in module datafusion.expr)": [[4, "datafusion.expr.EmptyRelation", false]], "enable_cell_expansion (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.enable_cell_expansion", false]], "enable_url_table() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.enable_url_table", false]], "encode() (datafusion.substrait.plan method)": [[16, "datafusion.substrait.Plan.encode", false]], "encode() (in module datafusion.functions)": [[5, "datafusion.functions.encode", false]], "encoding (datafusion.dataframe.parquetcolumnoptions attribute)": [[2, "datafusion.dataframe.ParquetColumnOptions.encoding", false]], "encoding (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.encoding", false]], "encoding (datafusion.parquetcolumnoptions attribute)": [[7, "datafusion.ParquetColumnOptions.encoding", false]], "encoding (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.encoding", false]], "end() (datafusion.expr.casebuilder method)": [[4, "datafusion.expr.CaseBuilder.end", false]], "ends_with() (in module datafusion.functions)": [[5, "datafusion.functions.ends_with", false]], "ensure_expr() (in module datafusion.expr)": [[4, "datafusion.expr.ensure_expr", false]], "ensure_expr_list() (in module datafusion.expr)": [[4, "datafusion.expr.ensure_expr_list", false]], "escape (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.escape", false]], "escape (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.escape", false]], "evaluate() (datafusion.accumulator method)": [[7, "datafusion.Accumulator.evaluate", false]], "evaluate() (datafusion.user_defined.accumulator method)": [[18, "datafusion.user_defined.Accumulator.evaluate", false]], "evaluate() (datafusion.user_defined.windowevaluator method)": [[18, "datafusion.user_defined.WindowEvaluator.evaluate", false]], "evaluate_all() (datafusion.user_defined.windowevaluator method)": [[18, "datafusion.user_defined.WindowEvaluator.evaluate_all", false]], "evaluate_all_with_rank() (datafusion.user_defined.windowevaluator method)": [[18, "datafusion.user_defined.WindowEvaluator.evaluate_all_with_rank", false]], "except_all() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.except_all", false]], "execute (in module datafusion.expr)": [[4, "datafusion.expr.Execute", false]], "execute() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.execute", false]], "execute_stream() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.execute_stream", false]], "execute_stream_partitioned() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.execute_stream_partitioned", false]], "execution_plan() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.execution_plan", false]], "executionplan (class in datafusion)": [[7, "datafusion.ExecutionPlan", false]], "executionplan (class in datafusion.plan)": [[14, "datafusion.plan.ExecutionPlan", false]], "exists (in module datafusion.expr)": [[4, "datafusion.expr.Exists", false]], "exp() (datafusion.expr method)": [[7, "datafusion.Expr.exp", false]], "exp() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.exp", false]], "exp() (in module datafusion.functions)": [[5, "datafusion.functions.exp", false]], "explain (in module datafusion.expr)": [[4, "datafusion.expr.Explain", false]], "explain() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.explain", false]], "expr (class in datafusion)": [[7, "datafusion.Expr", false]], "expr (class in datafusion.expr)": [[4, "datafusion.expr.Expr", false]], "expr (datafusion.expr attribute)": [[7, "datafusion.Expr.expr", false]], "expr (datafusion.expr.expr attribute)": [[4, "datafusion.expr.Expr.expr", false]], "expr() (datafusion.expr.sortexpr method)": [[4, "datafusion.expr.SortExpr.expr", false]], "expr_type_error (in module datafusion.expr)": [[4, "datafusion.expr.EXPR_TYPE_ERROR", false]], "extension (in module datafusion.expr)": [[4, "datafusion.expr.Extension", false]], "extract() (in module datafusion.functions)": [[5, "datafusion.functions.extract", false]], "factorial() (datafusion.expr method)": [[7, "datafusion.Expr.factorial", false]], "factorial() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.factorial", false]], "factorial() (in module datafusion.functions)": [[5, "datafusion.functions.factorial", false]], "file_compression_type (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.file_compression_type", false]], "file_compression_type (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.file_compression_type", false]], "file_extension (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.file_extension", false]], "file_extension (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.file_extension", false]], "file_sort_order (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.file_sort_order", false]], "file_sort_order (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.file_sort_order", false]], "filetype (in module datafusion.expr)": [[4, "datafusion.expr.FileType", false]], "fill_nan() (datafusion.expr method)": [[7, "datafusion.Expr.fill_nan", false]], "fill_nan() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.fill_nan", false]], "fill_null() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.fill_null", false]], "fill_null() (datafusion.expr method)": [[7, "datafusion.Expr.fill_null", false]], "fill_null() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.fill_null", false]], "filter (in module datafusion.expr)": [[4, "datafusion.expr.Filter", false]], "filter() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.filter", false]], "filter() (datafusion.expr method)": [[7, "datafusion.Expr.filter", false]], "filter() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.filter", false]], "find_in_set() (in module datafusion.functions)": [[5, "datafusion.functions.find_in_set", false]], "first_value() (in module datafusion.functions)": [[5, "datafusion.functions.first_value", false]], "flatten() (datafusion.expr method)": [[7, "datafusion.Expr.flatten", false]], "flatten() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.flatten", false]], "flatten() (in module datafusion.functions)": [[5, "datafusion.functions.flatten", false]], "floor() (datafusion.expr method)": [[7, "datafusion.Expr.floor", false]], "floor() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.floor", false]], "floor() (in module datafusion.functions)": [[5, "datafusion.functions.floor", false]], "format_html() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.format_html", false]], "format_str() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.format_str", false]], "formattermanager (class in datafusion.dataframe_formatter)": [[3, "datafusion.dataframe_formatter.FormatterManager", false]], "frame_bound (datafusion.expr.windowframebound attribute)": [[4, "datafusion.expr.WindowFrameBound.frame_bound", false]], "from_arrow() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.from_arrow", false]], "from_arrow_table() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.from_arrow_table", false]], "from_dataset() (datafusion.catalog.table static method)": [[0, "datafusion.catalog.Table.from_dataset", false]], "from_dataset() (datafusion.table static method)": [[7, "datafusion.Table.from_dataset", false]], "from_json() (datafusion.substrait.plan static method)": [[16, "datafusion.substrait.Plan.from_json", false]], "from_pandas() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.from_pandas", false]], "from_polars() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.from_polars", false]], "from_proto() (datafusion.executionplan static method)": [[7, "datafusion.ExecutionPlan.from_proto", false]], "from_proto() (datafusion.logicalplan static method)": [[7, "datafusion.LogicalPlan.from_proto", false]], "from_proto() (datafusion.plan.executionplan static method)": [[14, "datafusion.plan.ExecutionPlan.from_proto", false]], "from_proto() (datafusion.plan.logicalplan static method)": [[14, "datafusion.plan.LogicalPlan.from_proto", false]], "from_pycapsule() (datafusion.aggregateudf static method)": [[7, "datafusion.AggregateUDF.from_pycapsule", false]], "from_pycapsule() (datafusion.scalarudf static method)": [[7, "datafusion.ScalarUDF.from_pycapsule", false]], "from_pycapsule() (datafusion.user_defined.aggregateudf static method)": [[18, "datafusion.user_defined.AggregateUDF.from_pycapsule", false]], "from_pycapsule() (datafusion.user_defined.scalarudf static method)": [[18, "datafusion.user_defined.ScalarUDF.from_pycapsule", false]], "from_pycapsule() (datafusion.user_defined.windowudf static method)": [[18, "datafusion.user_defined.WindowUDF.from_pycapsule", false]], "from_pycapsule() (datafusion.windowudf static method)": [[7, "datafusion.WindowUDF.from_pycapsule", false]], "from_pydict() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.from_pydict", false]], "from_pylist() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.from_pylist", false]], "from_str() (datafusion.dataframe.compression class method)": [[2, "datafusion.dataframe.Compression.from_str", false]], "from_substrait_plan() (datafusion.substrait.consumer static method)": [[16, "datafusion.substrait.Consumer.from_substrait_plan", false]], "from_unixtime() (datafusion.expr method)": [[7, "datafusion.Expr.from_unixtime", false]], "from_unixtime() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.from_unixtime", false]], "from_unixtime() (in module datafusion.functions)": [[5, "datafusion.functions.from_unixtime", false]], "gcd() (in module datafusion.functions)": [[5, "datafusion.functions.gcd", false]], "get_cell_style() (datafusion.dataframe_formatter.defaultstyleprovider method)": [[3, "datafusion.dataframe_formatter.DefaultStyleProvider.get_cell_style", false]], "get_cell_style() (datafusion.dataframe_formatter.styleprovider method)": [[3, "datafusion.dataframe_formatter.StyleProvider.get_cell_style", false]], "get_default_level() (datafusion.dataframe.compression method)": [[2, "datafusion.dataframe.Compression.get_default_level", false]], "get_formatter() (datafusion.dataframe_formatter.formattermanager class method)": [[3, "datafusion.dataframe_formatter.FormatterManager.get_formatter", false]], "get_formatter() (in module datafusion.dataframe_formatter)": [[3, "datafusion.dataframe_formatter.get_formatter", false]], "get_frame_units() (datafusion.expr.windowframe method)": [[4, "datafusion.expr.WindowFrame.get_frame_units", false]], "get_frame_units() (datafusion.windowframe method)": [[7, "datafusion.WindowFrame.get_frame_units", false]], "get_header_style() (datafusion.dataframe_formatter.defaultstyleprovider method)": [[3, "datafusion.dataframe_formatter.DefaultStyleProvider.get_header_style", false]], "get_header_style() (datafusion.dataframe_formatter.styleprovider method)": [[3, "datafusion.dataframe_formatter.StyleProvider.get_header_style", false]], "get_lower_bound() (datafusion.expr.windowframe method)": [[4, "datafusion.expr.WindowFrame.get_lower_bound", false]], "get_lower_bound() (datafusion.windowframe method)": [[7, "datafusion.WindowFrame.get_lower_bound", false]], "get_offset() (datafusion.expr.windowframebound method)": [[4, "datafusion.expr.WindowFrameBound.get_offset", false]], "get_range() (datafusion.user_defined.windowevaluator method)": [[18, "datafusion.user_defined.WindowEvaluator.get_range", false]], "get_upper_bound() (datafusion.expr.windowframe method)": [[4, "datafusion.expr.WindowFrame.get_upper_bound", false]], "get_upper_bound() (datafusion.windowframe method)": [[7, "datafusion.WindowFrame.get_upper_bound", false]], "global_ctx() (datafusion.context.sessioncontext class method)": [[1, "datafusion.context.SessionContext.global_ctx", false]], "googlecloud (in module datafusion.object_store)": [[12, "datafusion.object_store.GoogleCloud", false]], "groupingset (in module datafusion.expr)": [[4, "datafusion.expr.GroupingSet", false]], "gzip (datafusion.dataframe.compression attribute)": [[2, "datafusion.dataframe.Compression.GZIP", false]], "has_header (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.has_header", false]], "has_header (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.has_header", false]], "head() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.head", false]], "http (in module datafusion.object_store)": [[12, "datafusion.object_store.Http", false]], "ilike (in module datafusion.expr)": [[4, "datafusion.expr.ILike", false]], "immutable (datafusion.user_defined.volatility attribute)": [[18, "datafusion.user_defined.Volatility.Immutable", false]], "in_list() (in module datafusion.functions)": [[5, "datafusion.functions.in_list", false]], "include_rank() (datafusion.user_defined.windowevaluator method)": [[18, "datafusion.user_defined.WindowEvaluator.include_rank", false]], "initcap() (datafusion.expr method)": [[7, "datafusion.Expr.initcap", false]], "initcap() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.initcap", false]], "initcap() (in module datafusion.functions)": [[5, "datafusion.functions.initcap", false]], "inlist (in module datafusion.expr)": [[4, "datafusion.expr.InList", false]], "inputs() (datafusion.logicalplan method)": [[7, "datafusion.LogicalPlan.inputs", false]], "inputs() (datafusion.plan.logicalplan method)": [[14, "datafusion.plan.LogicalPlan.inputs", false]], "insertop (class in datafusion)": [[7, "datafusion.InsertOp", false]], "insertop (class in datafusion.dataframe)": [[2, "datafusion.dataframe.InsertOp", false]], "insubquery (in module datafusion.expr)": [[4, "datafusion.expr.InSubquery", false]], "intersect() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.intersect", false]], "into_view() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.into_view", false]], "is_causal() (datafusion.user_defined.windowevaluator method)": [[18, "datafusion.user_defined.WindowEvaluator.is_causal", false]], "is_correct_input() (datafusion.input.base.baseinputsource method)": [[8, "datafusion.input.base.BaseInputSource.is_correct_input", false]], "is_correct_input() (datafusion.input.location.locationinputplugin method)": [[10, "datafusion.input.location.LocationInputPlugin.is_correct_input", false]], "is_correct_input() (datafusion.input.locationinputplugin method)": [[9, "datafusion.input.LocationInputPlugin.is_correct_input", false]], "is_current_row() (datafusion.expr.windowframebound method)": [[4, "datafusion.expr.WindowFrameBound.is_current_row", false]], "is_following() (datafusion.expr.windowframebound method)": [[4, "datafusion.expr.WindowFrameBound.is_following", false]], "is_not_null() (datafusion.expr method)": [[7, "datafusion.Expr.is_not_null", false]], "is_not_null() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.is_not_null", false]], "is_null() (datafusion.expr method)": [[7, "datafusion.Expr.is_null", false]], "is_null() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.is_null", false]], "is_preceding() (datafusion.expr.windowframebound method)": [[4, "datafusion.expr.WindowFrameBound.is_preceding", false]], "is_unbounded() (datafusion.expr.windowframebound method)": [[4, "datafusion.expr.WindowFrameBound.is_unbounded", false]], "isfalse (in module datafusion.expr)": [[4, "datafusion.expr.IsFalse", false]], "isnan() (datafusion.expr method)": [[7, "datafusion.Expr.isnan", false]], "isnan() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.isnan", false]], "isnan() (in module datafusion.functions)": [[5, "datafusion.functions.isnan", false]], "isnotfalse (in module datafusion.expr)": [[4, "datafusion.expr.IsNotFalse", false]], "isnotnull (in module datafusion.expr)": [[4, "datafusion.expr.IsNotNull", false]], "isnottrue (in module datafusion.expr)": [[4, "datafusion.expr.IsNotTrue", false]], "isnotunknown (in module datafusion.expr)": [[4, "datafusion.expr.IsNotUnknown", false]], "isnull (in module datafusion.expr)": [[4, "datafusion.expr.IsNull", false]], "istrue (in module datafusion.expr)": [[4, "datafusion.expr.IsTrue", false]], "isunknown (in module datafusion.expr)": [[4, "datafusion.expr.IsUnknown", false]], "iszero() (datafusion.expr method)": [[7, "datafusion.Expr.iszero", false]], "iszero() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.iszero", false]], "iszero() (in module datafusion.functions)": [[5, "datafusion.functions.iszero", false]], "join (in module datafusion.expr)": [[4, "datafusion.expr.Join", false]], "join() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.join", false]], "join_on() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.join_on", false]], "joinconstraint (in module datafusion.expr)": [[4, "datafusion.expr.JoinConstraint", false]], "jointype (in module datafusion.expr)": [[4, "datafusion.expr.JoinType", false]], "kind (datafusion.catalog.table property)": [[0, "datafusion.catalog.Table.kind", false]], "kind (datafusion.table property)": [[7, "datafusion.Table.kind", false]], "lag() (in module datafusion.functions)": [[5, "datafusion.functions.lag", false]], "last_value() (in module datafusion.functions)": [[5, "datafusion.functions.last_value", false]], "lcm() (in module datafusion.functions)": [[5, "datafusion.functions.lcm", false]], "lead() (in module datafusion.functions)": [[5, "datafusion.functions.lead", false]], "left() (in module datafusion.functions)": [[5, "datafusion.functions.left", false]], "length() (datafusion.expr method)": [[7, "datafusion.Expr.length", false]], "length() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.length", false]], "length() (in module datafusion.functions)": [[5, "datafusion.functions.length", false]], "levenshtein() (in module datafusion.functions)": [[5, "datafusion.functions.levenshtein", false]], "like (in module datafusion.expr)": [[4, "datafusion.expr.Like", false]], "limit (in module datafusion.expr)": [[4, "datafusion.expr.Limit", false]], "limit() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.limit", false]], "list_append() (in module datafusion.functions)": [[5, "datafusion.functions.list_append", false]], "list_cat() (in module datafusion.functions)": [[5, "datafusion.functions.list_cat", false]], "list_concat() (in module datafusion.functions)": [[5, "datafusion.functions.list_concat", false]], "list_dims() (datafusion.expr method)": [[7, "datafusion.Expr.list_dims", false]], "list_dims() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.list_dims", false]], "list_dims() (in module datafusion.functions)": [[5, "datafusion.functions.list_dims", false]], "list_distinct() (datafusion.expr method)": [[7, "datafusion.Expr.list_distinct", false]], "list_distinct() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.list_distinct", false]], "list_distinct() (in module datafusion.functions)": [[5, "datafusion.functions.list_distinct", false]], "list_element() (in module datafusion.functions)": [[5, "datafusion.functions.list_element", false]], "list_except() (in module datafusion.functions)": [[5, "datafusion.functions.list_except", false]], "list_extract() (in module datafusion.functions)": [[5, "datafusion.functions.list_extract", false]], "list_indexof() (in module datafusion.functions)": [[5, "datafusion.functions.list_indexof", false]], "list_intersect() (in module datafusion.functions)": [[5, "datafusion.functions.list_intersect", false]], "list_join() (in module datafusion.functions)": [[5, "datafusion.functions.list_join", false]], "list_length() (datafusion.expr method)": [[7, "datafusion.Expr.list_length", false]], "list_length() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.list_length", false]], "list_length() (in module datafusion.functions)": [[5, "datafusion.functions.list_length", false]], "list_ndims() (datafusion.expr method)": [[7, "datafusion.Expr.list_ndims", false]], "list_ndims() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.list_ndims", false]], "list_ndims() (in module datafusion.functions)": [[5, "datafusion.functions.list_ndims", false]], "list_position() (in module datafusion.functions)": [[5, "datafusion.functions.list_position", false]], "list_positions() (in module datafusion.functions)": [[5, "datafusion.functions.list_positions", false]], "list_prepend() (in module datafusion.functions)": [[5, "datafusion.functions.list_prepend", false]], "list_push_back() (in module datafusion.functions)": [[5, "datafusion.functions.list_push_back", false]], "list_push_front() (in module datafusion.functions)": [[5, "datafusion.functions.list_push_front", false]], "list_remove() (in module datafusion.functions)": [[5, "datafusion.functions.list_remove", false]], "list_remove_all() (in module datafusion.functions)": [[5, "datafusion.functions.list_remove_all", false]], "list_remove_n() (in module datafusion.functions)": [[5, "datafusion.functions.list_remove_n", false]], "list_repeat() (in module datafusion.functions)": [[5, "datafusion.functions.list_repeat", false]], "list_replace() (in module datafusion.functions)": [[5, "datafusion.functions.list_replace", false]], "list_replace_all() (in module datafusion.functions)": [[5, "datafusion.functions.list_replace_all", false]], "list_replace_n() (in module datafusion.functions)": [[5, "datafusion.functions.list_replace_n", false]], "list_resize() (in module datafusion.functions)": [[5, "datafusion.functions.list_resize", false]], "list_slice() (in module datafusion.functions)": [[5, "datafusion.functions.list_slice", false]], "list_sort() (in module datafusion.functions)": [[5, "datafusion.functions.list_sort", false]], "list_to_string() (in module datafusion.functions)": [[5, "datafusion.functions.list_to_string", false]], "list_union() (in module datafusion.functions)": [[5, "datafusion.functions.list_union", false]], "lit() (in module datafusion)": [[7, "datafusion.lit", false]], "literal (in module datafusion.expr)": [[4, "datafusion.expr.Literal", false]], "literal() (datafusion.expr static method)": [[7, "datafusion.Expr.literal", false]], "literal() (datafusion.expr.expr static method)": [[4, "datafusion.expr.Expr.literal", false]], "literal() (in module datafusion)": [[7, "datafusion.literal", false]], "literal_with_metadata() (datafusion.expr static method)": [[7, "datafusion.Expr.literal_with_metadata", false]], "literal_with_metadata() (datafusion.expr.expr static method)": [[4, "datafusion.expr.Expr.literal_with_metadata", false]], "ln() (datafusion.expr method)": [[7, "datafusion.Expr.ln", false]], "ln() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.ln", false]], "ln() (in module datafusion.functions)": [[5, "datafusion.functions.ln", false]], "localfilesystem (in module datafusion.object_store)": [[12, "datafusion.object_store.LocalFileSystem", false]], "locationinputplugin (class in datafusion.input)": [[9, "datafusion.input.LocationInputPlugin", false]], "locationinputplugin (class in datafusion.input.location)": [[10, "datafusion.input.location.LocationInputPlugin", false]], "log() (in module datafusion.functions)": [[5, "datafusion.functions.log", false]], "log10() (datafusion.expr method)": [[7, "datafusion.Expr.log10", false]], "log10() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.log10", false]], "log10() (in module datafusion.functions)": [[5, "datafusion.functions.log10", false]], "log2() (datafusion.expr method)": [[7, "datafusion.Expr.log2", false]], "log2() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.log2", false]], "log2() (in module datafusion.functions)": [[5, "datafusion.functions.log2", false]], "logical_plan() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.logical_plan", false]], "logicalplan (class in datafusion)": [[7, "datafusion.LogicalPlan", false]], "logicalplan (class in datafusion.plan)": [[14, "datafusion.plan.LogicalPlan", false]], "lower() (datafusion.expr method)": [[7, "datafusion.Expr.lower", false]], "lower() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.lower", false]], "lower() (in module datafusion.functions)": [[5, "datafusion.functions.lower", false]], "lpad() (in module datafusion.functions)": [[5, "datafusion.functions.lpad", false]], "ltrim() (datafusion.expr method)": [[7, "datafusion.Expr.ltrim", false]], "ltrim() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.ltrim", false]], "ltrim() (in module datafusion.functions)": [[5, "datafusion.functions.ltrim", false]], "lz4 (datafusion.dataframe.compression attribute)": [[2, "datafusion.dataframe.Compression.LZ4", false]], "lz4_raw (datafusion.dataframe.compression attribute)": [[2, "datafusion.dataframe.Compression.LZ4_RAW", false]], "make_array() (in module datafusion.functions)": [[5, "datafusion.functions.make_array", false]], "make_date() (in module datafusion.functions)": [[5, "datafusion.functions.make_date", false]], "make_list() (in module datafusion.functions)": [[5, "datafusion.functions.make_list", false]], "max() (in module datafusion.functions)": [[5, "datafusion.functions.max", false]], "max_cell_length (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.max_cell_length", false]], "max_height (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.max_height", false]], "max_memory_bytes (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.max_memory_bytes", false]], "max_row_group_size (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.max_row_group_size", false]], "max_row_group_size (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.max_row_group_size", false]], "max_rows (datafusion.dataframe_formatter.dataframehtmlformatter property)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.max_rows", false]], "max_width (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.max_width", false]], "maximum_buffered_record_batches_per_stream (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.maximum_buffered_record_batches_per_stream", false]], "maximum_buffered_record_batches_per_stream (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.maximum_buffered_record_batches_per_stream", false]], "maximum_parallel_row_group_writers (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.maximum_parallel_row_group_writers", false]], "maximum_parallel_row_group_writers (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.maximum_parallel_row_group_writers", false]], "md5() (datafusion.expr method)": [[7, "datafusion.Expr.md5", false]], "md5() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.md5", false]], "md5() (in module datafusion.functions)": [[5, "datafusion.functions.md5", false]], "mean() (in module datafusion.functions)": [[5, "datafusion.functions.mean", false]], "median() (in module datafusion.functions)": [[5, "datafusion.functions.median", false]], "memoize() (datafusion.user_defined.windowevaluator method)": [[18, "datafusion.user_defined.WindowEvaluator.memoize", false]], "memory_catalog() (datafusion.catalog static method)": [[7, "datafusion.Catalog.memory_catalog", false]], "memory_catalog() (datafusion.catalog.catalog static method)": [[0, "datafusion.catalog.Catalog.memory_catalog", false]], "memory_catalog() (datafusion.catalog.cataloglist static method)": [[0, "datafusion.catalog.CatalogList.memory_catalog", false]], "memory_schema() (datafusion.catalog.schema static method)": [[0, "datafusion.catalog.Schema.memory_schema", false]], "merge() (datafusion.accumulator method)": [[7, "datafusion.Accumulator.merge", false]], "merge() (datafusion.user_defined.accumulator method)": [[18, "datafusion.user_defined.Accumulator.merge", false]], "microsoftazure (in module datafusion.object_store)": [[12, "datafusion.object_store.MicrosoftAzure", false]], "min() (in module datafusion.functions)": [[5, "datafusion.functions.min", false]], "min_rows (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.min_rows", false]], "module": [[0, "module-datafusion.catalog", false], [1, "module-datafusion.context", false], [2, "module-datafusion.dataframe", false], [3, "module-datafusion.dataframe_formatter", false], [4, "module-datafusion.expr", false], [5, "module-datafusion.functions", false], [6, "module-datafusion.html_formatter", false], [7, "module-datafusion", false], [8, "module-datafusion.input.base", false], [9, "module-datafusion.input", false], [10, "module-datafusion.input.location", false], [11, "module-datafusion.io", false], [12, "module-datafusion.object_store", false], [13, "module-datafusion.options", false], [14, "module-datafusion.plan", false], [15, "module-datafusion.record_batch", false], [16, "module-datafusion.substrait", false], [17, "module-datafusion.unparser", false], [18, "module-datafusion.user_defined", false]], "mysql() (datafusion.unparser.dialect static method)": [[17, "datafusion.unparser.Dialect.mysql", false]], "named_struct() (in module datafusion.functions)": [[5, "datafusion.functions.named_struct", false]], "names() (datafusion.catalog method)": [[7, "datafusion.Catalog.names", false]], "names() (datafusion.catalog.catalog method)": [[0, "datafusion.catalog.Catalog.names", false]], "names() (datafusion.catalog.cataloglist method)": [[0, "datafusion.catalog.CatalogList.names", false]], "names() (datafusion.catalog.schema method)": [[0, "datafusion.catalog.Schema.names", false]], "nanvl() (in module datafusion.functions)": [[5, "datafusion.functions.nanvl", false]], "negative (in module datafusion.expr)": [[4, "datafusion.expr.Negative", false]], "newlines_in_values (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.newlines_in_values", false]], "newlines_in_values (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.newlines_in_values", false]], "next() (datafusion.record_batch.recordbatchstream method)": [[15, "datafusion.record_batch.RecordBatchStream.next", false]], "next() (datafusion.recordbatchstream method)": [[7, "datafusion.RecordBatchStream.next", false]], "not (in module datafusion.expr)": [[4, "datafusion.expr.Not", false]], "now() (in module datafusion.functions)": [[5, "datafusion.functions.now", false]], "nth_value() (in module datafusion.functions)": [[5, "datafusion.functions.nth_value", false]], "ntile() (in module datafusion.functions)": [[5, "datafusion.functions.ntile", false]], "null_regex (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.null_regex", false]], "null_regex (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.null_regex", false]], "null_treatment() (datafusion.expr method)": [[7, "datafusion.Expr.null_treatment", false]], "null_treatment() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.null_treatment", false]], "nullif() (in module datafusion.functions)": [[5, "datafusion.functions.nullif", false]], "nulls_first() (datafusion.expr.sortexpr method)": [[4, "datafusion.expr.SortExpr.nulls_first", false]], "nvl() (in module datafusion.functions)": [[5, "datafusion.functions.nvl", false]], "octet_length() (datafusion.expr method)": [[7, "datafusion.Expr.octet_length", false]], "octet_length() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.octet_length", false]], "octet_length() (in module datafusion.functions)": [[5, "datafusion.functions.octet_length", false]], "operatefunctionarg (in module datafusion.expr)": [[4, "datafusion.expr.OperateFunctionArg", false]], "optimized_logical_plan() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.optimized_logical_plan", false]], "options_internal (datafusion.context.sqloptions attribute)": [[1, "datafusion.context.SQLOptions.options_internal", false]], "options_internal (datafusion.sqloptions attribute)": [[7, "datafusion.SQLOptions.options_internal", false]], "order_by() (datafusion.expr method)": [[7, "datafusion.Expr.order_by", false]], "order_by() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.order_by", false]], "order_by() (in module datafusion.functions)": [[5, "datafusion.functions.order_by", false]], "otherwise() (datafusion.expr.casebuilder method)": [[4, "datafusion.expr.CaseBuilder.otherwise", false]], "over() (datafusion.expr method)": [[7, "datafusion.Expr.over", false]], "over() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.over", false]], "overlay() (in module datafusion.functions)": [[5, "datafusion.functions.overlay", false]], "overwrite (datafusion.dataframe.insertop attribute)": [[2, "datafusion.dataframe.InsertOp.OVERWRITE", false]], "overwrite (datafusion.insertop attribute)": [[7, "datafusion.InsertOp.OVERWRITE", false]], "owner_name() (datafusion.catalog.schemaprovider method)": [[0, "datafusion.catalog.SchemaProvider.owner_name", false]], "parquetcolumnoptions (class in datafusion)": [[7, "datafusion.ParquetColumnOptions", false]], "parquetcolumnoptions (class in datafusion.dataframe)": [[2, "datafusion.dataframe.ParquetColumnOptions", false]], "parquetwriteroptions (class in datafusion)": [[7, "datafusion.ParquetWriterOptions", false]], "parquetwriteroptions (class in datafusion.dataframe)": [[2, "datafusion.dataframe.ParquetWriterOptions", false]], "parse_sql_expr() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.parse_sql_expr", false]], "partition_by() (datafusion.expr method)": [[7, "datafusion.Expr.partition_by", false]], "partition_by() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.partition_by", false]], "partition_count (datafusion.executionplan property)": [[7, "datafusion.ExecutionPlan.partition_count", false]], "partition_count (datafusion.plan.executionplan property)": [[14, "datafusion.plan.ExecutionPlan.partition_count", false]], "partitioning (in module datafusion.expr)": [[4, "datafusion.expr.Partitioning", false]], "percent_rank() (in module datafusion.functions)": [[5, "datafusion.functions.percent_rank", false]], "pi() (in module datafusion.functions)": [[5, "datafusion.functions.pi", false]], "placeholder (in module datafusion.expr)": [[4, "datafusion.expr.Placeholder", false]], "plan (class in datafusion.substrait)": [[16, "datafusion.substrait.Plan", false]], "plan_internal (datafusion.substrait.plan attribute)": [[16, "datafusion.substrait.Plan.plan_internal", false]], "plan_to_sql() (datafusion.unparser.unparser method)": [[17, "datafusion.unparser.Unparser.plan_to_sql", false]], "postgres() (datafusion.unparser.dialect static method)": [[17, "datafusion.unparser.Dialect.postgres", false]], "pow() (in module datafusion.functions)": [[5, "datafusion.functions.pow", false]], "power() (in module datafusion.functions)": [[5, "datafusion.functions.power", false]], "prepare (in module datafusion.expr)": [[4, "datafusion.expr.Prepare", false]], "producer (class in datafusion.substrait)": [[16, "datafusion.substrait.Producer", false]], "projection (in module datafusion.expr)": [[4, "datafusion.expr.Projection", false]], "python_value() (datafusion.expr method)": [[7, "datafusion.Expr.python_value", false]], "python_value() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.python_value", false]], "quote (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.quote", false]], "quote (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.quote", false]], "radians() (datafusion.expr method)": [[7, "datafusion.Expr.radians", false]], "radians() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.radians", false]], "radians() (in module datafusion.functions)": [[5, "datafusion.functions.radians", false]], "random() (in module datafusion.functions)": [[5, "datafusion.functions.random", false]], "range() (in module datafusion.functions)": [[5, "datafusion.functions.range", false]], "rank() (in module datafusion.functions)": [[5, "datafusion.functions.rank", false]], "raw_sort (datafusion.expr.sortexpr attribute)": [[4, "datafusion.expr.SortExpr.raw_sort", false]], "rbs (datafusion.record_batch.recordbatchstream attribute)": [[15, "datafusion.record_batch.RecordBatchStream.rbs", false]], "rbs (datafusion.recordbatchstream attribute)": [[7, "datafusion.RecordBatchStream.rbs", false]], "read_avro() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.read_avro", false]], "read_avro() (in module datafusion)": [[7, "datafusion.read_avro", false]], "read_avro() (in module datafusion.io)": [[11, "datafusion.io.read_avro", false]], "read_csv() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.read_csv", false]], "read_csv() (in module datafusion)": [[7, "datafusion.read_csv", false]], "read_csv() (in module datafusion.io)": [[11, "datafusion.io.read_csv", false]], "read_json() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.read_json", false]], "read_json() (in module datafusion)": [[7, "datafusion.read_json", false]], "read_json() (in module datafusion.io)": [[11, "datafusion.io.read_json", false]], "read_parquet() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.read_parquet", false]], "read_parquet() (in module datafusion)": [[7, "datafusion.read_parquet", false]], "read_parquet() (in module datafusion.io)": [[11, "datafusion.io.read_parquet", false]], "read_table() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.read_table", false]], "record_batch (datafusion.record_batch.recordbatch attribute)": [[15, "datafusion.record_batch.RecordBatch.record_batch", false]], "record_batch (datafusion.recordbatch attribute)": [[7, "datafusion.RecordBatch.record_batch", false]], "recordbatch (class in datafusion)": [[7, "datafusion.RecordBatch", false]], "recordbatch (class in datafusion.record_batch)": [[15, "datafusion.record_batch.RecordBatch", false]], "recordbatchstream (class in datafusion)": [[7, "datafusion.RecordBatchStream", false]], "recordbatchstream (class in datafusion.record_batch)": [[15, "datafusion.record_batch.RecordBatchStream", false]], "recursivequery (in module datafusion.expr)": [[4, "datafusion.expr.RecursiveQuery", false]], "regexp_count() (in module datafusion.functions)": [[5, "datafusion.functions.regexp_count", false]], "regexp_instr() (in module datafusion.functions)": [[5, "datafusion.functions.regexp_instr", false]], "regexp_like() (in module datafusion.functions)": [[5, "datafusion.functions.regexp_like", false]], "regexp_match() (in module datafusion.functions)": [[5, "datafusion.functions.regexp_match", false]], "regexp_replace() (in module datafusion.functions)": [[5, "datafusion.functions.regexp_replace", false]], "register_avro() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_avro", false]], "register_catalog() (datafusion.catalog.cataloglist method)": [[0, "datafusion.catalog.CatalogList.register_catalog", false]], "register_catalog() (datafusion.catalog.catalogproviderlist method)": [[0, "datafusion.catalog.CatalogProviderList.register_catalog", false]], "register_catalog_provider() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_catalog_provider", false]], "register_catalog_provider_list() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_catalog_provider_list", false]], "register_csv() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_csv", false]], "register_dataset() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_dataset", false]], "register_formatter() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.register_formatter", false]], "register_json() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_json", false]], "register_listing_table() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_listing_table", false]], "register_object_store() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_object_store", false]], "register_parquet() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_parquet", false]], "register_record_batches() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_record_batches", false]], "register_schema() (datafusion.catalog method)": [[7, "datafusion.Catalog.register_schema", false]], "register_schema() (datafusion.catalog.catalog method)": [[0, "datafusion.catalog.Catalog.register_schema", false]], "register_schema() (datafusion.catalog.catalogprovider method)": [[0, "datafusion.catalog.CatalogProvider.register_schema", false]], "register_table() (datafusion.catalog.schema method)": [[0, "datafusion.catalog.Schema.register_table", false]], "register_table() (datafusion.catalog.schemaprovider method)": [[0, "datafusion.catalog.SchemaProvider.register_table", false]], "register_table() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_table", false]], "register_table_provider() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_table_provider", false]], "register_udaf() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_udaf", false]], "register_udf() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_udf", false]], "register_udtf() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_udtf", false]], "register_udwf() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_udwf", false]], "register_view() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.register_view", false]], "regr_avgx() (in module datafusion.functions)": [[5, "datafusion.functions.regr_avgx", false]], "regr_avgy() (in module datafusion.functions)": [[5, "datafusion.functions.regr_avgy", false]], "regr_count() (in module datafusion.functions)": [[5, "datafusion.functions.regr_count", false]], "regr_intercept() (in module datafusion.functions)": [[5, "datafusion.functions.regr_intercept", false]], "regr_r2() (in module datafusion.functions)": [[5, "datafusion.functions.regr_r2", false]], "regr_slope() (in module datafusion.functions)": [[5, "datafusion.functions.regr_slope", false]], "regr_sxx() (in module datafusion.functions)": [[5, "datafusion.functions.regr_sxx", false]], "regr_sxy() (in module datafusion.functions)": [[5, "datafusion.functions.regr_sxy", false]], "regr_syy() (in module datafusion.functions)": [[5, "datafusion.functions.regr_syy", false]], "repartition (in module datafusion.expr)": [[4, "datafusion.expr.Repartition", false]], "repartition() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.repartition", false]], "repartition_by_hash() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.repartition_by_hash", false]], "repeat() (in module datafusion.functions)": [[5, "datafusion.functions.repeat", false]], "replace (datafusion.dataframe.insertop attribute)": [[2, "datafusion.dataframe.InsertOp.REPLACE", false]], "replace (datafusion.insertop attribute)": [[7, "datafusion.InsertOp.REPLACE", false]], "replace() (in module datafusion.functions)": [[5, "datafusion.functions.replace", false]], "repr_rows (datafusion.dataframe_formatter.dataframehtmlformatter property)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.repr_rows", false]], "reset_formatter() (in module datafusion.dataframe_formatter)": [[3, "datafusion.dataframe_formatter.reset_formatter", false]], "reverse() (datafusion.expr method)": [[7, "datafusion.Expr.reverse", false]], "reverse() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.reverse", false]], "reverse() (in module datafusion.functions)": [[5, "datafusion.functions.reverse", false]], "rex_call_operands() (datafusion.expr method)": [[7, "datafusion.Expr.rex_call_operands", false]], "rex_call_operands() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.rex_call_operands", false]], "rex_call_operator() (datafusion.expr method)": [[7, "datafusion.Expr.rex_call_operator", false]], "rex_call_operator() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.rex_call_operator", false]], "rex_type() (datafusion.expr method)": [[7, "datafusion.Expr.rex_type", false]], "rex_type() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.rex_type", false]], "right() (in module datafusion.functions)": [[5, "datafusion.functions.right", false]], "round() (in module datafusion.functions)": [[5, "datafusion.functions.round", false]], "row_number() (in module datafusion.functions)": [[5, "datafusion.functions.row_number", false]], "rpad() (in module datafusion.functions)": [[5, "datafusion.functions.rpad", false]], "rtrim() (datafusion.expr method)": [[7, "datafusion.Expr.rtrim", false]], "rtrim() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.rtrim", false]], "rtrim() (in module datafusion.functions)": [[5, "datafusion.functions.rtrim", false]], "runtimeconfig (class in datafusion.context)": [[1, "datafusion.context.RuntimeConfig", false]], "runtimeenvbuilder (class in datafusion)": [[7, "datafusion.RuntimeEnvBuilder", false]], "runtimeenvbuilder (class in datafusion.context)": [[1, "datafusion.context.RuntimeEnvBuilder", false]], "scalarsubquery (in module datafusion.expr)": [[4, "datafusion.expr.ScalarSubquery", false]], "scalarudf (class in datafusion)": [[7, "datafusion.ScalarUDF", false]], "scalarudf (class in datafusion.user_defined)": [[18, "datafusion.user_defined.ScalarUDF", false]], "scalarudfexportable (class in datafusion.user_defined)": [[18, "datafusion.user_defined.ScalarUDFExportable", false]], "scalarvariable (in module datafusion.expr)": [[4, "datafusion.expr.ScalarVariable", false]], "schema (class in datafusion.catalog)": [[0, "datafusion.catalog.Schema", false]], "schema (datafusion.catalog.table property)": [[0, "datafusion.catalog.Table.schema", false]], "schema (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.schema", false]], "schema (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.schema", false]], "schema (datafusion.table property)": [[7, "datafusion.Table.schema", false]], "schema() (datafusion.catalog method)": [[7, "datafusion.Catalog.schema", false]], "schema() (datafusion.catalog.catalog method)": [[0, "datafusion.catalog.Catalog.schema", false]], "schema() (datafusion.catalog.catalogprovider method)": [[0, "datafusion.catalog.CatalogProvider.schema", false]], "schema() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.schema", false]], "schema_infer_max_records (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.schema_infer_max_records", false]], "schema_infer_max_records (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.schema_infer_max_records", false]], "schema_name() (datafusion.expr method)": [[7, "datafusion.Expr.schema_name", false]], "schema_name() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.schema_name", false]], "schema_names() (datafusion.catalog method)": [[7, "datafusion.Catalog.schema_names", false]], "schema_names() (datafusion.catalog.catalog method)": [[0, "datafusion.catalog.Catalog.schema_names", false]], "schema_names() (datafusion.catalog.catalogprovider method)": [[0, "datafusion.catalog.CatalogProvider.schema_names", false]], "schemaprovider (class in datafusion.catalog)": [[0, "datafusion.catalog.SchemaProvider", false]], "select() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.select", false]], "select_columns() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.select_columns", false]], "select_exprs() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.select_exprs", false]], "serde (class in datafusion.substrait)": [[16, "datafusion.substrait.Serde", false]], "serialize() (datafusion.substrait.serde static method)": [[16, "datafusion.substrait.Serde.serialize", false]], "serialize_bytes() (datafusion.substrait.serde static method)": [[16, "datafusion.substrait.Serde.serialize_bytes", false]], "serialize_to_plan() (datafusion.substrait.serde static method)": [[16, "datafusion.substrait.Serde.serialize_to_plan", false]], "session_id() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.session_id", false]], "sessionconfig (class in datafusion)": [[7, "datafusion.SessionConfig", false]], "sessionconfig (class in datafusion.context)": [[1, "datafusion.context.SessionConfig", false]], "sessioncontext (class in datafusion.context)": [[1, "datafusion.context.SessionContext", false]], "set() (datafusion.context.sessionconfig method)": [[1, "datafusion.context.SessionConfig.set", false]], "set() (datafusion.sessionconfig method)": [[7, "datafusion.SessionConfig.set", false]], "set_custom_cell_builder() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.set_custom_cell_builder", false]], "set_custom_header_builder() (datafusion.dataframe_formatter.dataframehtmlformatter method)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.set_custom_header_builder", false]], "set_formatter() (datafusion.dataframe_formatter.formattermanager class method)": [[3, "datafusion.dataframe_formatter.FormatterManager.set_formatter", false]], "set_formatter() (in module datafusion.dataframe_formatter)": [[3, "datafusion.dataframe_formatter.set_formatter", false]], "setvariable (in module datafusion.expr)": [[4, "datafusion.expr.SetVariable", false]], "sha224() (datafusion.expr method)": [[7, "datafusion.Expr.sha224", false]], "sha224() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.sha224", false]], "sha224() (in module datafusion.functions)": [[5, "datafusion.functions.sha224", false]], "sha256() (datafusion.expr method)": [[7, "datafusion.Expr.sha256", false]], "sha256() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.sha256", false]], "sha256() (in module datafusion.functions)": [[5, "datafusion.functions.sha256", false]], "sha384() (datafusion.expr method)": [[7, "datafusion.Expr.sha384", false]], "sha384() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.sha384", false]], "sha384() (in module datafusion.functions)": [[5, "datafusion.functions.sha384", false]], "sha512() (datafusion.expr method)": [[7, "datafusion.Expr.sha512", false]], "sha512() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.sha512", false]], "sha512() (in module datafusion.functions)": [[5, "datafusion.functions.sha512", false]], "show() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.show", false]], "show_truncation_message (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.show_truncation_message", false]], "signum() (datafusion.expr method)": [[7, "datafusion.Expr.signum", false]], "signum() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.signum", false]], "signum() (in module datafusion.functions)": [[5, "datafusion.functions.signum", false]], "similarto (in module datafusion.expr)": [[4, "datafusion.expr.SimilarTo", false]], "sin() (datafusion.expr method)": [[7, "datafusion.Expr.sin", false]], "sin() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.sin", false]], "sin() (in module datafusion.functions)": [[5, "datafusion.functions.sin", false]], "sinh() (datafusion.expr method)": [[7, "datafusion.Expr.sinh", false]], "sinh() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.sinh", false]], "sinh() (in module datafusion.functions)": [[5, "datafusion.functions.sinh", false]], "skip_arrow_metadata (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.skip_arrow_metadata", false]], "skip_arrow_metadata (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.skip_arrow_metadata", false]], "snappy (datafusion.dataframe.compression attribute)": [[2, "datafusion.dataframe.Compression.SNAPPY", false]], "sort (in module datafusion.expr)": [[4, "datafusion.expr.Sort", false]], "sort() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.sort", false]], "sort() (datafusion.expr method)": [[7, "datafusion.Expr.sort", false]], "sort() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.sort", false]], "sortexpr (class in datafusion.expr)": [[4, "datafusion.expr.SortExpr", false]], "sortkey (in module datafusion.expr)": [[4, "datafusion.expr.SortKey", false]], "split_part() (in module datafusion.functions)": [[5, "datafusion.functions.split_part", false]], "sql() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.sql", false]], "sql_with_options() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.sql_with_options", false]], "sqlite() (datafusion.unparser.dialect static method)": [[17, "datafusion.unparser.Dialect.sqlite", false]], "sqloptions (class in datafusion)": [[7, "datafusion.SQLOptions", false]], "sqloptions (class in datafusion.context)": [[1, "datafusion.context.SQLOptions", false]], "sqrt() (datafusion.expr method)": [[7, "datafusion.Expr.sqrt", false]], "sqrt() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.sqrt", false]], "sqrt() (in module datafusion.functions)": [[5, "datafusion.functions.sqrt", false]], "stable (datafusion.user_defined.volatility attribute)": [[18, "datafusion.user_defined.Volatility.Stable", false]], "starts_with() (in module datafusion.functions)": [[5, "datafusion.functions.starts_with", false]], "state() (datafusion.accumulator method)": [[7, "datafusion.Accumulator.state", false]], "state() (datafusion.user_defined.accumulator method)": [[18, "datafusion.user_defined.Accumulator.state", false]], "statistics_enabled (datafusion.dataframe.parquetcolumnoptions attribute)": [[2, "datafusion.dataframe.ParquetColumnOptions.statistics_enabled", false]], "statistics_enabled (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.statistics_enabled", false]], "statistics_enabled (datafusion.parquetcolumnoptions attribute)": [[7, "datafusion.ParquetColumnOptions.statistics_enabled", false]], "statistics_enabled (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.statistics_enabled", false]], "statistics_truncate_length (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.statistics_truncate_length", false]], "statistics_truncate_length (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.statistics_truncate_length", false]], "stddev() (in module datafusion.functions)": [[5, "datafusion.functions.stddev", false]], "stddev_pop() (in module datafusion.functions)": [[5, "datafusion.functions.stddev_pop", false]], "stddev_samp() (in module datafusion.functions)": [[5, "datafusion.functions.stddev_samp", false]], "string_agg() (in module datafusion.functions)": [[5, "datafusion.functions.string_agg", false]], "string_literal() (datafusion.expr static method)": [[7, "datafusion.Expr.string_literal", false]], "string_literal() (datafusion.expr.expr static method)": [[4, "datafusion.expr.Expr.string_literal", false]], "strpos() (in module datafusion.functions)": [[5, "datafusion.functions.strpos", false]], "struct() (in module datafusion.functions)": [[5, "datafusion.functions.struct", false]], "style_provider (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.style_provider", false]], "styleprovider (class in datafusion.dataframe_formatter)": [[3, "datafusion.dataframe_formatter.StyleProvider", false]], "subquery (in module datafusion.expr)": [[4, "datafusion.expr.Subquery", false]], "subqueryalias (in module datafusion.expr)": [[4, "datafusion.expr.SubqueryAlias", false]], "substr() (in module datafusion.functions)": [[5, "datafusion.functions.substr", false]], "substr_index() (in module datafusion.functions)": [[5, "datafusion.functions.substr_index", false]], "substring() (in module datafusion.functions)": [[5, "datafusion.functions.substring", false]], "sum() (in module datafusion.functions)": [[5, "datafusion.functions.sum", false]], "supports_bounded_execution() (datafusion.user_defined.windowevaluator method)": [[18, "datafusion.user_defined.WindowEvaluator.supports_bounded_execution", false]], "table (class in datafusion)": [[7, "datafusion.Table", false]], "table (class in datafusion.catalog)": [[0, "datafusion.catalog.Table", false]], "table() (datafusion.catalog.schema method)": [[0, "datafusion.catalog.Schema.table", false]], "table() (datafusion.catalog.schemaprovider method)": [[0, "datafusion.catalog.SchemaProvider.table", false]], "table() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.table", false]], "table_exist() (datafusion.catalog.schema method)": [[0, "datafusion.catalog.Schema.table_exist", false]], "table_exist() (datafusion.catalog.schemaprovider method)": [[0, "datafusion.catalog.SchemaProvider.table_exist", false]], "table_exist() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.table_exist", false]], "table_names() (datafusion.catalog.schema method)": [[0, "datafusion.catalog.Schema.table_names", false]], "table_names() (datafusion.catalog.schemaprovider method)": [[0, "datafusion.catalog.SchemaProvider.table_names", false]], "table_partition_cols (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.table_partition_cols", false]], "table_partition_cols (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.table_partition_cols", false]], "tablefunction (class in datafusion)": [[7, "datafusion.TableFunction", false]], "tablefunction (class in datafusion.user_defined)": [[18, "datafusion.user_defined.TableFunction", false]], "tableproviderexportable (class in datafusion.context)": [[1, "datafusion.context.TableProviderExportable", false]], "tablescan (in module datafusion.expr)": [[4, "datafusion.expr.TableScan", false]], "tail() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.tail", false]], "tan() (datafusion.expr method)": [[7, "datafusion.Expr.tan", false]], "tan() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.tan", false]], "tan() (in module datafusion.functions)": [[5, "datafusion.functions.tan", false]], "tanh() (datafusion.expr method)": [[7, "datafusion.Expr.tanh", false]], "tanh() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.tanh", false]], "tanh() (in module datafusion.functions)": [[5, "datafusion.functions.tanh", false]], "terminator (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.terminator", false]], "terminator (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.terminator", false]], "to_arrow_table() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.to_arrow_table", false]], "to_hex() (datafusion.expr method)": [[7, "datafusion.Expr.to_hex", false]], "to_hex() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.to_hex", false]], "to_hex() (in module datafusion.functions)": [[5, "datafusion.functions.to_hex", false]], "to_inner() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.to_inner", false]], "to_inner() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.to_inner", false]], "to_json() (datafusion.substrait.plan method)": [[16, "datafusion.substrait.Plan.to_json", false]], "to_pandas() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.to_pandas", false]], "to_polars() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.to_polars", false]], "to_proto() (datafusion.executionplan method)": [[7, "datafusion.ExecutionPlan.to_proto", false]], "to_proto() (datafusion.logicalplan method)": [[7, "datafusion.LogicalPlan.to_proto", false]], "to_proto() (datafusion.plan.executionplan method)": [[14, "datafusion.plan.ExecutionPlan.to_proto", false]], "to_proto() (datafusion.plan.logicalplan method)": [[14, "datafusion.plan.LogicalPlan.to_proto", false]], "to_pyarrow() (datafusion.record_batch.recordbatch method)": [[15, "datafusion.record_batch.RecordBatch.to_pyarrow", false]], "to_pyarrow() (datafusion.recordbatch method)": [[7, "datafusion.RecordBatch.to_pyarrow", false]], "to_pydict() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.to_pydict", false]], "to_pylist() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.to_pylist", false]], "to_substrait_plan() (datafusion.substrait.producer static method)": [[16, "datafusion.substrait.Producer.to_substrait_plan", false]], "to_timestamp() (in module datafusion.functions)": [[5, "datafusion.functions.to_timestamp", false]], "to_timestamp_micros() (in module datafusion.functions)": [[5, "datafusion.functions.to_timestamp_micros", false]], "to_timestamp_millis() (in module datafusion.functions)": [[5, "datafusion.functions.to_timestamp_millis", false]], "to_timestamp_nanos() (in module datafusion.functions)": [[5, "datafusion.functions.to_timestamp_nanos", false]], "to_timestamp_seconds() (in module datafusion.functions)": [[5, "datafusion.functions.to_timestamp_seconds", false]], "to_unixtime() (in module datafusion.functions)": [[5, "datafusion.functions.to_unixtime", false]], "to_variant() (datafusion.expr method)": [[7, "datafusion.Expr.to_variant", false]], "to_variant() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.to_variant", false]], "to_variant() (datafusion.logicalplan method)": [[7, "datafusion.LogicalPlan.to_variant", false]], "to_variant() (datafusion.plan.logicalplan method)": [[14, "datafusion.plan.LogicalPlan.to_variant", false]], "transactionaccessmode (in module datafusion.expr)": [[4, "datafusion.expr.TransactionAccessMode", false]], "transactionconclusion (in module datafusion.expr)": [[4, "datafusion.expr.TransactionConclusion", false]], "transactionend (in module datafusion.expr)": [[4, "datafusion.expr.TransactionEnd", false]], "transactionisolationlevel (in module datafusion.expr)": [[4, "datafusion.expr.TransactionIsolationLevel", false]], "transactionstart (in module datafusion.expr)": [[4, "datafusion.expr.TransactionStart", false]], "transform() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.transform", false]], "translate() (in module datafusion.functions)": [[5, "datafusion.functions.translate", false]], "trim() (datafusion.expr method)": [[7, "datafusion.Expr.trim", false]], "trim() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.trim", false]], "trim() (in module datafusion.functions)": [[5, "datafusion.functions.trim", false]], "trunc() (in module datafusion.functions)": [[5, "datafusion.functions.trunc", false]], "truncated_rows (datafusion.csvreadoptions attribute)": [[7, "datafusion.CsvReadOptions.truncated_rows", false]], "truncated_rows (datafusion.options.csvreadoptions attribute)": [[13, "datafusion.options.CsvReadOptions.truncated_rows", false]], "trycast (in module datafusion.expr)": [[4, "datafusion.expr.TryCast", false]], "types() (datafusion.expr method)": [[7, "datafusion.Expr.types", false]], "types() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.types", false]], "udaf (in module datafusion)": [[7, "datafusion.udaf", false]], "udaf (in module datafusion.user_defined)": [[18, "datafusion.user_defined.udaf", false]], "udaf() (datafusion.aggregateudf static method)": [[7, "datafusion.AggregateUDF.udaf", false]], "udaf() (datafusion.user_defined.aggregateudf static method)": [[18, "datafusion.user_defined.AggregateUDF.udaf", false]], "udf (in module datafusion)": [[7, "datafusion.udf", false]], "udf (in module datafusion.user_defined)": [[18, "datafusion.user_defined.udf", false]], "udf() (datafusion.scalarudf static method)": [[7, "datafusion.ScalarUDF.udf", false]], "udf() (datafusion.user_defined.scalarudf static method)": [[18, "datafusion.user_defined.ScalarUDF.udf", false]], "udtf (in module datafusion)": [[7, "datafusion.udtf", false]], "udtf (in module datafusion.user_defined)": [[18, "datafusion.user_defined.udtf", false]], "udtf() (datafusion.tablefunction static method)": [[7, "datafusion.TableFunction.udtf", false]], "udtf() (datafusion.user_defined.tablefunction static method)": [[18, "datafusion.user_defined.TableFunction.udtf", false]], "udwf (in module datafusion)": [[7, "datafusion.udwf", false]], "udwf (in module datafusion.user_defined)": [[18, "datafusion.user_defined.udwf", false]], "udwf() (datafusion.user_defined.windowudf static method)": [[18, "datafusion.user_defined.WindowUDF.udwf", false]], "udwf() (datafusion.windowudf static method)": [[7, "datafusion.WindowUDF.udwf", false]], "uncompressed (datafusion.dataframe.compression attribute)": [[2, "datafusion.dataframe.Compression.UNCOMPRESSED", false]], "union (in module datafusion.expr)": [[4, "datafusion.expr.Union", false]], "union() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.union", false]], "union_distinct() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.union_distinct", false]], "unnest (in module datafusion.expr)": [[4, "datafusion.expr.Unnest", false]], "unnest_columns() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.unnest_columns", false]], "unnestexpr (in module datafusion.expr)": [[4, "datafusion.expr.UnnestExpr", false]], "unparser (class in datafusion.unparser)": [[17, "datafusion.unparser.Unparser", false]], "unparser (datafusion.unparser.unparser attribute)": [[17, "datafusion.unparser.Unparser.unparser", false]], "update() (datafusion.accumulator method)": [[7, "datafusion.Accumulator.update", false]], "update() (datafusion.user_defined.accumulator method)": [[18, "datafusion.user_defined.Accumulator.update", false]], "upper() (datafusion.expr method)": [[7, "datafusion.Expr.upper", false]], "upper() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.upper", false]], "upper() (in module datafusion.functions)": [[5, "datafusion.functions.upper", false]], "use_shared_styles (datafusion.dataframe_formatter.dataframehtmlformatter attribute)": [[3, "datafusion.dataframe_formatter.DataFrameHtmlFormatter.use_shared_styles", false]], "uses_window_frame() (datafusion.user_defined.windowevaluator method)": [[18, "datafusion.user_defined.WindowEvaluator.uses_window_frame", false]], "uuid() (in module datafusion.functions)": [[5, "datafusion.functions.uuid", false]], "values (in module datafusion.expr)": [[4, "datafusion.expr.Values", false]], "var() (in module datafusion.functions)": [[5, "datafusion.functions.var", false]], "var_pop() (in module datafusion.functions)": [[5, "datafusion.functions.var_pop", false]], "var_samp() (in module datafusion.functions)": [[5, "datafusion.functions.var_samp", false]], "var_sample() (in module datafusion.functions)": [[5, "datafusion.functions.var_sample", false]], "variant_name() (datafusion.expr method)": [[7, "datafusion.Expr.variant_name", false]], "variant_name() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.variant_name", false]], "volatile (datafusion.user_defined.volatility attribute)": [[18, "datafusion.user_defined.Volatility.Volatile", false]], "volatility (class in datafusion.user_defined)": [[18, "datafusion.user_defined.Volatility", false]], "when() (datafusion.expr.casebuilder method)": [[4, "datafusion.expr.CaseBuilder.when", false]], "when() (in module datafusion.functions)": [[5, "datafusion.functions.when", false]], "window (class in datafusion.expr)": [[4, "datafusion.expr.Window", false]], "window() (in module datafusion.functions)": [[5, "datafusion.functions.window", false]], "window_frame (datafusion.expr.windowframe attribute)": [[4, "datafusion.expr.WindowFrame.window_frame", false]], "window_frame (datafusion.windowframe attribute)": [[7, "datafusion.WindowFrame.window_frame", false]], "window_frame() (datafusion.expr method)": [[7, "datafusion.Expr.window_frame", false]], "window_frame() (datafusion.expr.expr method)": [[4, "datafusion.expr.Expr.window_frame", false]], "windowevaluator (class in datafusion.user_defined)": [[18, "datafusion.user_defined.WindowEvaluator", false]], "windowexpr (in module datafusion.expr)": [[4, "datafusion.expr.WindowExpr", false]], "windowframe (class in datafusion)": [[7, "datafusion.WindowFrame", false]], "windowframe (class in datafusion.expr)": [[4, "datafusion.expr.WindowFrame", false]], "windowframebound (class in datafusion.expr)": [[4, "datafusion.expr.WindowFrameBound", false]], "windowudf (class in datafusion)": [[7, "datafusion.WindowUDF", false]], "windowudf (class in datafusion.user_defined)": [[18, "datafusion.user_defined.WindowUDF", false]], "windowudfexportable (class in datafusion.user_defined)": [[18, "datafusion.user_defined.WindowUDFExportable", false]], "with_allow_ddl() (datafusion.context.sqloptions method)": [[1, "datafusion.context.SQLOptions.with_allow_ddl", false]], "with_allow_ddl() (datafusion.sqloptions method)": [[7, "datafusion.SQLOptions.with_allow_ddl", false]], "with_allow_dml() (datafusion.context.sqloptions method)": [[1, "datafusion.context.SQLOptions.with_allow_dml", false]], "with_allow_dml() (datafusion.sqloptions method)": [[7, "datafusion.SQLOptions.with_allow_dml", false]], "with_allow_statements() (datafusion.context.sqloptions method)": [[1, "datafusion.context.SQLOptions.with_allow_statements", false]], "with_allow_statements() (datafusion.sqloptions method)": [[7, "datafusion.SQLOptions.with_allow_statements", false]], "with_batch_size() (datafusion.context.sessionconfig method)": [[1, "datafusion.context.SessionConfig.with_batch_size", false]], "with_batch_size() (datafusion.sessionconfig method)": [[7, "datafusion.SessionConfig.with_batch_size", false]], "with_column() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.with_column", false]], "with_column_renamed() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.with_column_renamed", false]], "with_columns() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.with_columns", false]], "with_comment() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_comment", false]], "with_comment() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_comment", false]], "with_create_default_catalog_and_schema() (datafusion.context.sessionconfig method)": [[1, "datafusion.context.SessionConfig.with_create_default_catalog_and_schema", false]], "with_create_default_catalog_and_schema() (datafusion.sessionconfig method)": [[7, "datafusion.SessionConfig.with_create_default_catalog_and_schema", false]], "with_default_catalog_and_schema() (datafusion.context.sessionconfig method)": [[1, "datafusion.context.SessionConfig.with_default_catalog_and_schema", false]], "with_default_catalog_and_schema() (datafusion.sessionconfig method)": [[7, "datafusion.SessionConfig.with_default_catalog_and_schema", false]], "with_delimiter() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_delimiter", false]], "with_delimiter() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_delimiter", false]], "with_disk_manager_disabled() (datafusion.context.runtimeenvbuilder method)": [[1, "datafusion.context.RuntimeEnvBuilder.with_disk_manager_disabled", false]], "with_disk_manager_disabled() (datafusion.runtimeenvbuilder method)": [[7, "datafusion.RuntimeEnvBuilder.with_disk_manager_disabled", false]], "with_disk_manager_os() (datafusion.context.runtimeenvbuilder method)": [[1, "datafusion.context.RuntimeEnvBuilder.with_disk_manager_os", false]], "with_disk_manager_os() (datafusion.runtimeenvbuilder method)": [[7, "datafusion.RuntimeEnvBuilder.with_disk_manager_os", false]], "with_disk_manager_specified() (datafusion.context.runtimeenvbuilder method)": [[1, "datafusion.context.RuntimeEnvBuilder.with_disk_manager_specified", false]], "with_disk_manager_specified() (datafusion.runtimeenvbuilder method)": [[7, "datafusion.RuntimeEnvBuilder.with_disk_manager_specified", false]], "with_escape() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_escape", false]], "with_escape() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_escape", false]], "with_fair_spill_pool() (datafusion.context.runtimeenvbuilder method)": [[1, "datafusion.context.RuntimeEnvBuilder.with_fair_spill_pool", false]], "with_fair_spill_pool() (datafusion.runtimeenvbuilder method)": [[7, "datafusion.RuntimeEnvBuilder.with_fair_spill_pool", false]], "with_file_compression_type() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_file_compression_type", false]], "with_file_compression_type() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_file_compression_type", false]], "with_file_extension() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_file_extension", false]], "with_file_extension() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_file_extension", false]], "with_file_sort_order() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_file_sort_order", false]], "with_file_sort_order() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_file_sort_order", false]], "with_greedy_memory_pool() (datafusion.context.runtimeenvbuilder method)": [[1, "datafusion.context.RuntimeEnvBuilder.with_greedy_memory_pool", false]], "with_greedy_memory_pool() (datafusion.runtimeenvbuilder method)": [[7, "datafusion.RuntimeEnvBuilder.with_greedy_memory_pool", false]], "with_has_header() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_has_header", false]], "with_has_header() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_has_header", false]], "with_information_schema() (datafusion.context.sessionconfig method)": [[1, "datafusion.context.SessionConfig.with_information_schema", false]], "with_information_schema() (datafusion.sessionconfig method)": [[7, "datafusion.SessionConfig.with_information_schema", false]], "with_logical_extension_codec() (datafusion.context.sessioncontext method)": [[1, "datafusion.context.SessionContext.with_logical_extension_codec", false]], "with_newlines_in_values() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_newlines_in_values", false]], "with_newlines_in_values() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_newlines_in_values", false]], "with_null_regex() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_null_regex", false]], "with_null_regex() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_null_regex", false]], "with_parquet_pruning() (datafusion.context.sessionconfig method)": [[1, "datafusion.context.SessionConfig.with_parquet_pruning", false]], "with_parquet_pruning() (datafusion.sessionconfig method)": [[7, "datafusion.SessionConfig.with_parquet_pruning", false]], "with_pretty() (datafusion.unparser.unparser method)": [[17, "datafusion.unparser.Unparser.with_pretty", false]], "with_quote() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_quote", false]], "with_quote() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_quote", false]], "with_repartition_aggregations() (datafusion.context.sessionconfig method)": [[1, "datafusion.context.SessionConfig.with_repartition_aggregations", false]], "with_repartition_aggregations() (datafusion.sessionconfig method)": [[7, "datafusion.SessionConfig.with_repartition_aggregations", false]], "with_repartition_file_min_size() (datafusion.context.sessionconfig method)": [[1, "datafusion.context.SessionConfig.with_repartition_file_min_size", false]], "with_repartition_file_min_size() (datafusion.sessionconfig method)": [[7, "datafusion.SessionConfig.with_repartition_file_min_size", false]], "with_repartition_file_scans() (datafusion.context.sessionconfig method)": [[1, "datafusion.context.SessionConfig.with_repartition_file_scans", false]], "with_repartition_file_scans() (datafusion.sessionconfig method)": [[7, "datafusion.SessionConfig.with_repartition_file_scans", false]], "with_repartition_joins() (datafusion.context.sessionconfig method)": [[1, "datafusion.context.SessionConfig.with_repartition_joins", false]], "with_repartition_joins() (datafusion.sessionconfig method)": [[7, "datafusion.SessionConfig.with_repartition_joins", false]], "with_repartition_sorts() (datafusion.context.sessionconfig method)": [[1, "datafusion.context.SessionConfig.with_repartition_sorts", false]], "with_repartition_sorts() (datafusion.sessionconfig method)": [[7, "datafusion.SessionConfig.with_repartition_sorts", false]], "with_repartition_windows() (datafusion.context.sessionconfig method)": [[1, "datafusion.context.SessionConfig.with_repartition_windows", false]], "with_repartition_windows() (datafusion.sessionconfig method)": [[7, "datafusion.SessionConfig.with_repartition_windows", false]], "with_schema() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_schema", false]], "with_schema() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_schema", false]], "with_schema_infer_max_records() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_schema_infer_max_records", false]], "with_schema_infer_max_records() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_schema_infer_max_records", false]], "with_table_partition_cols() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_table_partition_cols", false]], "with_table_partition_cols() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_table_partition_cols", false]], "with_target_partitions() (datafusion.context.sessionconfig method)": [[1, "datafusion.context.SessionConfig.with_target_partitions", false]], "with_target_partitions() (datafusion.sessionconfig method)": [[7, "datafusion.SessionConfig.with_target_partitions", false]], "with_temp_file_path() (datafusion.context.runtimeenvbuilder method)": [[1, "datafusion.context.RuntimeEnvBuilder.with_temp_file_path", false]], "with_temp_file_path() (datafusion.runtimeenvbuilder method)": [[7, "datafusion.RuntimeEnvBuilder.with_temp_file_path", false]], "with_terminator() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_terminator", false]], "with_terminator() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_terminator", false]], "with_truncated_rows() (datafusion.csvreadoptions method)": [[7, "datafusion.CsvReadOptions.with_truncated_rows", false]], "with_truncated_rows() (datafusion.options.csvreadoptions method)": [[13, "datafusion.options.CsvReadOptions.with_truncated_rows", false]], "with_unbounded_memory_pool() (datafusion.context.runtimeenvbuilder method)": [[1, "datafusion.context.RuntimeEnvBuilder.with_unbounded_memory_pool", false]], "with_unbounded_memory_pool() (datafusion.runtimeenvbuilder method)": [[7, "datafusion.RuntimeEnvBuilder.with_unbounded_memory_pool", false]], "write_batch_size (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.write_batch_size", false]], "write_batch_size (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.write_batch_size", false]], "write_csv() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.write_csv", false]], "write_json() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.write_json", false]], "write_parquet() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.write_parquet", false]], "write_parquet_with_options() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.write_parquet_with_options", false]], "write_table() (datafusion.dataframe.dataframe method)": [[2, "datafusion.dataframe.DataFrame.write_table", false]], "writer_version (datafusion.dataframe.parquetwriteroptions attribute)": [[2, "datafusion.dataframe.ParquetWriterOptions.writer_version", false]], "writer_version (datafusion.parquetwriteroptions attribute)": [[7, "datafusion.ParquetWriterOptions.writer_version", false]], "zstd (datafusion.dataframe.compression attribute)": [[2, "datafusion.dataframe.Compression.ZSTD", false]]}, "objects": {"": [[7, 0, 0, "-", "datafusion"]], "datafusion": [[7, 1, 1, "", "Accumulator"], [7, 1, 1, "", "AggregateUDF"], [7, 1, 1, "", "Catalog"], [7, 1, 1, "", "CsvReadOptions"], [7, 4, 1, "", "DFSchema"], [7, 1, 1, "", "DataFrameWriteOptions"], [7, 1, 1, "", "Database"], [7, 1, 1, "", "ExecutionPlan"], [7, 1, 1, "", "Expr"], [7, 1, 1, "", "InsertOp"], [7, 1, 1, "", "LogicalPlan"], [7, 1, 1, "", "ParquetColumnOptions"], [7, 1, 1, "", "ParquetWriterOptions"], [7, 1, 1, "", "RecordBatch"], [7, 1, 1, "", "RecordBatchStream"], [7, 1, 1, "", "RuntimeEnvBuilder"], [7, 1, 1, "", "SQLOptions"], [7, 1, 1, "", "ScalarUDF"], [7, 1, 1, "", "SessionConfig"], [7, 1, 1, "", "Table"], [7, 1, 1, "", "TableFunction"], [7, 1, 1, "", "WindowFrame"], [7, 1, 1, "", "WindowUDF"], [0, 0, 0, "-", "catalog"], [7, 4, 1, "", "col"], [7, 4, 1, "", "column"], [7, 6, 1, "", "configure_formatter"], [1, 0, 0, "-", "context"], [2, 0, 0, "-", "dataframe"], [3, 0, 0, "-", "dataframe_formatter"], [4, 0, 0, "-", "expr"], [5, 0, 0, "-", "functions"], [6, 0, 0, "-", "html_formatter"], [9, 0, 0, "-", "input"], [11, 0, 0, "-", "io"], [7, 6, 1, "", "lit"], [7, 6, 1, "", "literal"], [12, 0, 0, "-", "object_store"], [13, 0, 0, "-", "options"], [14, 0, 0, "-", "plan"], [7, 6, 1, "", "read_avro"], [7, 6, 1, "", "read_csv"], [7, 6, 1, "", "read_json"], [7, 6, 1, "", "read_parquet"], [15, 0, 0, "-", "record_batch"], [16, 0, 0, "-", "substrait"], [7, 4, 1, "", "udaf"], [7, 4, 1, "", "udf"], [7, 4, 1, "", "udtf"], [7, 4, 1, "", "udwf"], [17, 0, 0, "-", "unparser"], [18, 0, 0, "-", "user_defined"]], "datafusion.Accumulator": [[7, 2, 1, "", "evaluate"], [7, 2, 1, "", "merge"], [7, 2, 1, "", "state"], [7, 2, 1, "", "update"]], "datafusion.AggregateUDF": [[7, 2, 1, "", "__call__"], [7, 2, 1, "", "__repr__"], [7, 3, 1, "", "_udaf"], [7, 2, 1, "", "from_pycapsule"], [7, 2, 1, "", "udaf"]], "datafusion.Catalog": [[7, 2, 1, "", "__repr__"], [7, 3, 1, "", "catalog"], [7, 2, 1, "", "database"], [7, 2, 1, "", "deregister_schema"], [7, 2, 1, "", "memory_catalog"], [7, 2, 1, "", "names"], [7, 2, 1, "", "register_schema"], [7, 2, 1, "", "schema"], [7, 2, 1, "", "schema_names"]], "datafusion.CsvReadOptions": [[7, 3, 1, "", "comment"], [7, 3, 1, "", "delimiter"], [7, 3, 1, "", "escape"], [7, 3, 1, "", "file_compression_type"], [7, 3, 1, "", "file_extension"], [7, 3, 1, "", "file_sort_order"], [7, 3, 1, "", "has_header"], [7, 3, 1, "", "newlines_in_values"], [7, 3, 1, "", "null_regex"], [7, 3, 1, "", "quote"], [7, 3, 1, "", "schema"], [7, 3, 1, "", "schema_infer_max_records"], [7, 3, 1, "", "table_partition_cols"], [7, 3, 1, "", "terminator"], [7, 2, 1, "", "to_inner"], [7, 3, 1, "", "truncated_rows"], [7, 2, 1, "", "with_comment"], [7, 2, 1, "", "with_delimiter"], [7, 2, 1, "", "with_escape"], [7, 2, 1, "", "with_file_compression_type"], [7, 2, 1, "", "with_file_extension"], [7, 2, 1, "", "with_file_sort_order"], [7, 2, 1, "", "with_has_header"], [7, 2, 1, "", "with_newlines_in_values"], [7, 2, 1, "", "with_null_regex"], [7, 2, 1, "", "with_quote"], [7, 2, 1, "", "with_schema"], [7, 2, 1, "", "with_schema_infer_max_records"], [7, 2, 1, "", "with_table_partition_cols"], [7, 2, 1, "", "with_terminator"], [7, 2, 1, "", "with_truncated_rows"]], "datafusion.DataFrameWriteOptions": [[7, 3, 1, "", "_raw_write_options"]], "datafusion.ExecutionPlan": [[7, 2, 1, "", "__repr__"], [7, 3, 1, "", "_raw_plan"], [7, 2, 1, "", "children"], [7, 2, 1, "", "display"], [7, 2, 1, "", "display_indent"], [7, 2, 1, "", "from_proto"], [7, 5, 1, "", "partition_count"], [7, 2, 1, "", "to_proto"]], "datafusion.Expr": [[7, 2, 1, "", "__add__"], [7, 2, 1, "", "__and__"], [7, 2, 1, "", "__eq__"], [7, 2, 1, "", "__ge__"], [7, 2, 1, "", "__getitem__"], [7, 2, 1, "", "__gt__"], [7, 2, 1, "", "__invert__"], [7, 2, 1, "", "__le__"], [7, 2, 1, "", "__lt__"], [7, 2, 1, "", "__mod__"], [7, 2, 1, "", "__mul__"], [7, 2, 1, "", "__ne__"], [7, 2, 1, "", "__or__"], [7, 3, 1, "", "__radd__"], [7, 3, 1, "", "__rand__"], [7, 2, 1, "", "__repr__"], [7, 2, 1, "", "__richcmp__"], [7, 3, 1, "", "__rmod__"], [7, 3, 1, "", "__rmul__"], [7, 3, 1, "", "__ror__"], [7, 3, 1, "", "__rsub__"], [7, 3, 1, "", "__rtruediv__"], [7, 2, 1, "", "__sub__"], [7, 2, 1, "", "__truediv__"], [7, 3, 1, "", "_to_pyarrow_types"], [7, 2, 1, "", "abs"], [7, 2, 1, "", "acos"], [7, 2, 1, "", "acosh"], [7, 2, 1, "", "alias"], [7, 2, 1, "", "array_dims"], [7, 2, 1, "", "array_distinct"], [7, 2, 1, "", "array_empty"], [7, 2, 1, "", "array_length"], [7, 2, 1, "", "array_ndims"], [7, 2, 1, "", "array_pop_back"], [7, 2, 1, "", "array_pop_front"], [7, 2, 1, "", "arrow_typeof"], [7, 2, 1, "", "ascii"], [7, 2, 1, "", "asin"], [7, 2, 1, "", "asinh"], [7, 2, 1, "", "atan"], [7, 2, 1, "", "atanh"], [7, 2, 1, "", "between"], [7, 2, 1, "", "bit_length"], [7, 2, 1, "", "btrim"], [7, 2, 1, "", "canonical_name"], [7, 2, 1, "", "cardinality"], [7, 2, 1, "", "cast"], [7, 2, 1, "", "cbrt"], [7, 2, 1, "", "ceil"], [7, 2, 1, "", "char_length"], [7, 2, 1, "", "character_length"], [7, 2, 1, "", "chr"], [7, 2, 1, "", "column"], [7, 2, 1, "", "column_name"], [7, 2, 1, "", "cos"], [7, 2, 1, "", "cosh"], [7, 2, 1, "", "cot"], [7, 2, 1, "", "degrees"], [7, 2, 1, "", "display_name"], [7, 2, 1, "", "distinct"], [7, 2, 1, "", "empty"], [7, 2, 1, "", "exp"], [7, 3, 1, "", "expr"], [7, 2, 1, "", "factorial"], [7, 2, 1, "", "fill_nan"], [7, 2, 1, "", "fill_null"], [7, 2, 1, "", "filter"], [7, 2, 1, "", "flatten"], [7, 2, 1, "", "floor"], [7, 2, 1, "", "from_unixtime"], [7, 2, 1, "", "initcap"], [7, 2, 1, "", "is_not_null"], [7, 2, 1, "", "is_null"], [7, 2, 1, "", "isnan"], [7, 2, 1, "", "iszero"], [7, 2, 1, "", "length"], [7, 2, 1, "", "list_dims"], [7, 2, 1, "", "list_distinct"], [7, 2, 1, "", "list_length"], [7, 2, 1, "", "list_ndims"], [7, 2, 1, "", "literal"], [7, 2, 1, "", "literal_with_metadata"], [7, 2, 1, "", "ln"], [7, 2, 1, "", "log10"], [7, 2, 1, "", "log2"], [7, 2, 1, "", "lower"], [7, 2, 1, "", "ltrim"], [7, 2, 1, "", "md5"], [7, 2, 1, "", "null_treatment"], [7, 2, 1, "", "octet_length"], [7, 2, 1, "", "order_by"], [7, 2, 1, "", "over"], [7, 2, 1, "", "partition_by"], [7, 2, 1, "", "python_value"], [7, 2, 1, "", "radians"], [7, 2, 1, "", "reverse"], [7, 2, 1, "", "rex_call_operands"], [7, 2, 1, "", "rex_call_operator"], [7, 2, 1, "", "rex_type"], [7, 2, 1, "", "rtrim"], [7, 2, 1, "", "schema_name"], [7, 2, 1, "", "sha224"], [7, 2, 1, "", "sha256"], [7, 2, 1, "", "sha384"], [7, 2, 1, "", "sha512"], [7, 2, 1, "", "signum"], [7, 2, 1, "", "sin"], [7, 2, 1, "", "sinh"], [7, 2, 1, "", "sort"], [7, 2, 1, "", "sqrt"], [7, 2, 1, "", "string_literal"], [7, 2, 1, "", "tan"], [7, 2, 1, "", "tanh"], [7, 2, 1, "", "to_hex"], [7, 2, 1, "", "to_variant"], [7, 2, 1, "", "trim"], [7, 2, 1, "", "types"], [7, 2, 1, "", "upper"], [7, 2, 1, "", "variant_name"], [7, 2, 1, "", "window_frame"]], "datafusion.InsertOp": [[7, 3, 1, "", "APPEND"], [7, 3, 1, "", "OVERWRITE"], [7, 3, 1, "", "REPLACE"]], "datafusion.LogicalPlan": [[7, 2, 1, "", "__eq__"], [7, 2, 1, "", "__repr__"], [7, 3, 1, "", "_raw_plan"], [7, 2, 1, "", "display"], [7, 2, 1, "", "display_graphviz"], [7, 2, 1, "", "display_indent"], [7, 2, 1, "", "display_indent_schema"], [7, 2, 1, "", "from_proto"], [7, 2, 1, "", "inputs"], [7, 2, 1, "", "to_proto"], [7, 2, 1, "", "to_variant"]], "datafusion.ParquetColumnOptions": [[7, 3, 1, "", "bloom_filter_enabled"], [7, 3, 1, "", "bloom_filter_fpp"], [7, 3, 1, "", "bloom_filter_ndv"], [7, 3, 1, "", "compression"], [7, 3, 1, "", "dictionary_enabled"], [7, 3, 1, "", "encoding"], [7, 3, 1, "", "statistics_enabled"]], "datafusion.ParquetWriterOptions": [[7, 3, 1, "", "allow_single_file_parallelism"], [7, 3, 1, "", "bloom_filter_fpp"], [7, 3, 1, "", "bloom_filter_ndv"], [7, 3, 1, "", "bloom_filter_on_write"], [7, 3, 1, "", "column_index_truncate_length"], [7, 3, 1, "", "column_specific_options"], [7, 3, 1, "", "created_by"], [7, 3, 1, "", "data_page_row_count_limit"], [7, 3, 1, "", "data_pagesize_limit"], [7, 3, 1, "", "dictionary_enabled"], [7, 3, 1, "", "dictionary_page_size_limit"], [7, 3, 1, "", "encoding"], [7, 3, 1, "", "max_row_group_size"], [7, 3, 1, "", "maximum_buffered_record_batches_per_stream"], [7, 3, 1, "", "maximum_parallel_row_group_writers"], [7, 3, 1, "", "skip_arrow_metadata"], [7, 3, 1, "", "statistics_enabled"], [7, 3, 1, "", "statistics_truncate_length"], [7, 3, 1, "", "write_batch_size"], [7, 3, 1, "", "writer_version"]], "datafusion.RecordBatch": [[7, 2, 1, "", "__arrow_c_array__"], [7, 3, 1, "", "record_batch"], [7, 2, 1, "", "to_pyarrow"]], "datafusion.RecordBatchStream": [[7, 2, 1, "", "__aiter__"], [7, 2, 1, "", "__anext__"], [7, 2, 1, "", "__iter__"], [7, 2, 1, "", "__next__"], [7, 2, 1, "", "next"], [7, 3, 1, "", "rbs"]], "datafusion.RuntimeEnvBuilder": [[7, 3, 1, "", "config_internal"], [7, 2, 1, "", "with_disk_manager_disabled"], [7, 2, 1, "", "with_disk_manager_os"], [7, 2, 1, "", "with_disk_manager_specified"], [7, 2, 1, "", "with_fair_spill_pool"], [7, 2, 1, "", "with_greedy_memory_pool"], [7, 2, 1, "", "with_temp_file_path"], [7, 2, 1, "", "with_unbounded_memory_pool"]], "datafusion.SQLOptions": [[7, 3, 1, "", "options_internal"], [7, 2, 1, "", "with_allow_ddl"], [7, 2, 1, "", "with_allow_dml"], [7, 2, 1, "", "with_allow_statements"]], "datafusion.ScalarUDF": [[7, 2, 1, "", "__call__"], [7, 2, 1, "", "__repr__"], [7, 3, 1, "", "_udf"], [7, 2, 1, "", "from_pycapsule"], [7, 2, 1, "", "udf"]], "datafusion.SessionConfig": [[7, 3, 1, "", "config_internal"], [7, 2, 1, "", "set"], [7, 2, 1, "", "with_batch_size"], [7, 2, 1, "", "with_create_default_catalog_and_schema"], [7, 2, 1, "", "with_default_catalog_and_schema"], [7, 2, 1, "", "with_information_schema"], [7, 2, 1, "", "with_parquet_pruning"], [7, 2, 1, "", "with_repartition_aggregations"], [7, 2, 1, "", "with_repartition_file_min_size"], [7, 2, 1, "", "with_repartition_file_scans"], [7, 2, 1, "", "with_repartition_joins"], [7, 2, 1, "", "with_repartition_sorts"], [7, 2, 1, "", "with_repartition_windows"], [7, 2, 1, "", "with_target_partitions"]], "datafusion.Table": [[7, 2, 1, "", "__repr__"], [7, 3, 1, "", "__slots__"], [7, 3, 1, "", "_inner"], [7, 2, 1, "", "from_dataset"], [7, 5, 1, "", "kind"], [7, 5, 1, "", "schema"]], "datafusion.TableFunction": [[7, 2, 1, "", "__call__"], [7, 2, 1, "", "__repr__"], [7, 2, 1, "", "_create_table_udf"], [7, 2, 1, "", "_create_table_udf_decorator"], [7, 3, 1, "", "_udtf"], [7, 2, 1, "", "udtf"]], "datafusion.WindowFrame": [[7, 2, 1, "", "__repr__"], [7, 2, 1, "", "get_frame_units"], [7, 2, 1, "", "get_lower_bound"], [7, 2, 1, "", "get_upper_bound"], [7, 3, 1, "", "window_frame"]], "datafusion.WindowUDF": [[7, 2, 1, "", "__call__"], [7, 2, 1, "", "__repr__"], [7, 2, 1, "", "_create_window_udf"], [7, 2, 1, "", "_create_window_udf_decorator"], [7, 2, 1, "", "_get_default_name"], [7, 2, 1, "", "_normalize_input_types"], [7, 3, 1, "", "_udwf"], [7, 2, 1, "", "from_pycapsule"], [7, 2, 1, "", "udwf"]], "datafusion.catalog": [[0, 1, 1, "", "Catalog"], [0, 1, 1, "", "CatalogList"], [0, 1, 1, "", "CatalogProvider"], [0, 1, 1, "", "CatalogProviderList"], [0, 1, 1, "", "Schema"], [0, 1, 1, "", "SchemaProvider"], [0, 1, 1, "", "Table"]], "datafusion.catalog.Catalog": [[0, 2, 1, "", "__repr__"], [0, 3, 1, "", "catalog"], [0, 2, 1, "", "database"], [0, 2, 1, "", "deregister_schema"], [0, 2, 1, "", "memory_catalog"], [0, 2, 1, "", "names"], [0, 2, 1, "", "register_schema"], [0, 2, 1, "", "schema"], [0, 2, 1, "", "schema_names"]], "datafusion.catalog.CatalogList": [[0, 2, 1, "", "__repr__"], [0, 2, 1, "", "catalog"], [0, 3, 1, "", "catalog_list"], [0, 2, 1, "", "catalog_names"], [0, 2, 1, "", "memory_catalog"], [0, 2, 1, "", "names"], [0, 2, 1, "", "register_catalog"]], "datafusion.catalog.CatalogProvider": [[0, 2, 1, "", "deregister_schema"], [0, 2, 1, "", "register_schema"], [0, 2, 1, "", "schema"], [0, 2, 1, "", "schema_names"]], "datafusion.catalog.CatalogProviderList": [[0, 2, 1, "", "catalog"], [0, 2, 1, "", "catalog_names"], [0, 2, 1, "", "register_catalog"]], "datafusion.catalog.Schema": [[0, 2, 1, "", "__repr__"], [0, 3, 1, "", "_raw_schema"], [0, 2, 1, "", "deregister_table"], [0, 2, 1, "", "memory_schema"], [0, 2, 1, "", "names"], [0, 2, 1, "", "register_table"], [0, 2, 1, "", "table"], [0, 2, 1, "", "table_exist"], [0, 2, 1, "", "table_names"]], "datafusion.catalog.SchemaProvider": [[0, 2, 1, "", "deregister_table"], [0, 2, 1, "", "owner_name"], [0, 2, 1, "", "register_table"], [0, 2, 1, "", "table"], [0, 2, 1, "", "table_exist"], [0, 2, 1, "", "table_names"]], "datafusion.catalog.Table": [[0, 2, 1, "", "__repr__"], [0, 3, 1, "", "__slots__"], [0, 3, 1, "", "_inner"], [0, 2, 1, "", "from_dataset"], [0, 5, 1, "", "kind"], [0, 5, 1, "", "schema"]], "datafusion.context": [[1, 1, 1, "", "ArrowArrayExportable"], [1, 1, 1, "", "ArrowStreamExportable"], [1, 1, 1, "", "RuntimeConfig"], [1, 1, 1, "", "RuntimeEnvBuilder"], [1, 1, 1, "", "SQLOptions"], [1, 1, 1, "", "SessionConfig"], [1, 1, 1, "", "SessionContext"], [1, 1, 1, "", "TableProviderExportable"]], "datafusion.context.ArrowArrayExportable": [[1, 2, 1, "", "__arrow_c_array__"]], "datafusion.context.ArrowStreamExportable": [[1, 2, 1, "", "__arrow_c_stream__"]], "datafusion.context.RuntimeEnvBuilder": [[1, 3, 1, "", "config_internal"], [1, 2, 1, "", "with_disk_manager_disabled"], [1, 2, 1, "", "with_disk_manager_os"], [1, 2, 1, "", "with_disk_manager_specified"], [1, 2, 1, "", "with_fair_spill_pool"], [1, 2, 1, "", "with_greedy_memory_pool"], [1, 2, 1, "", "with_temp_file_path"], [1, 2, 1, "", "with_unbounded_memory_pool"]], "datafusion.context.SQLOptions": [[1, 3, 1, "", "options_internal"], [1, 2, 1, "", "with_allow_ddl"], [1, 2, 1, "", "with_allow_dml"], [1, 2, 1, "", "with_allow_statements"]], "datafusion.context.SessionConfig": [[1, 3, 1, "", "config_internal"], [1, 2, 1, "", "set"], [1, 2, 1, "", "with_batch_size"], [1, 2, 1, "", "with_create_default_catalog_and_schema"], [1, 2, 1, "", "with_default_catalog_and_schema"], [1, 2, 1, "", "with_information_schema"], [1, 2, 1, "", "with_parquet_pruning"], [1, 2, 1, "", "with_repartition_aggregations"], [1, 2, 1, "", "with_repartition_file_min_size"], [1, 2, 1, "", "with_repartition_file_scans"], [1, 2, 1, "", "with_repartition_joins"], [1, 2, 1, "", "with_repartition_sorts"], [1, 2, 1, "", "with_repartition_windows"], [1, 2, 1, "", "with_target_partitions"]], "datafusion.context.SessionContext": [[1, 2, 1, "", "__datafusion_logical_extension_codec__"], [1, 2, 1, "", "__datafusion_task_context_provider__"], [1, 2, 1, "", "__repr__"], [1, 2, 1, "", "_convert_file_sort_order"], [1, 2, 1, "", "_convert_table_partition_cols"], [1, 2, 1, "", "catalog"], [1, 2, 1, "", "catalog_names"], [1, 2, 1, "", "create_dataframe"], [1, 2, 1, "", "create_dataframe_from_logical_plan"], [1, 3, 1, "", "ctx"], [1, 2, 1, "", "deregister_table"], [1, 2, 1, "", "empty_table"], [1, 2, 1, "", "enable_url_table"], [1, 2, 1, "", "execute"], [1, 2, 1, "", "from_arrow"], [1, 2, 1, "", "from_arrow_table"], [1, 2, 1, "", "from_pandas"], [1, 2, 1, "", "from_polars"], [1, 2, 1, "", "from_pydict"], [1, 2, 1, "", "from_pylist"], [1, 2, 1, "", "global_ctx"], [1, 2, 1, "", "read_avro"], [1, 2, 1, "", "read_csv"], [1, 2, 1, "", "read_json"], [1, 2, 1, "", "read_parquet"], [1, 2, 1, "", "read_table"], [1, 2, 1, "", "register_avro"], [1, 2, 1, "", "register_catalog_provider"], [1, 2, 1, "", "register_catalog_provider_list"], [1, 2, 1, "", "register_csv"], [1, 2, 1, "", "register_dataset"], [1, 2, 1, "", "register_json"], [1, 2, 1, "", "register_listing_table"], [1, 2, 1, "", "register_object_store"], [1, 2, 1, "", "register_parquet"], [1, 2, 1, "", "register_record_batches"], [1, 2, 1, "", "register_table"], [1, 2, 1, "", "register_table_provider"], [1, 2, 1, "", "register_udaf"], [1, 2, 1, "", "register_udf"], [1, 2, 1, "", "register_udtf"], [1, 2, 1, "", "register_udwf"], [1, 2, 1, "", "register_view"], [1, 2, 1, "", "session_id"], [1, 2, 1, "", "sql"], [1, 2, 1, "", "sql_with_options"], [1, 2, 1, "", "table"], [1, 2, 1, "", "table_exist"], [1, 2, 1, "", "with_logical_extension_codec"]], "datafusion.context.TableProviderExportable": [[1, 2, 1, "", "__datafusion_table_provider__"]], "datafusion.dataframe": [[2, 1, 1, "", "Compression"], [2, 1, 1, "", "DataFrame"], [2, 1, 1, "", "DataFrameWriteOptions"], [2, 1, 1, "", "InsertOp"], [2, 1, 1, "", "ParquetColumnOptions"], [2, 1, 1, "", "ParquetWriterOptions"]], "datafusion.dataframe.Compression": [[2, 3, 1, "", "BROTLI"], [2, 3, 1, "", "GZIP"], [2, 3, 1, "", "LZ4"], [2, 3, 1, "", "LZ4_RAW"], [2, 3, 1, "", "SNAPPY"], [2, 3, 1, "", "UNCOMPRESSED"], [2, 3, 1, "", "ZSTD"], [2, 2, 1, "", "from_str"], [2, 2, 1, "", "get_default_level"]], "datafusion.dataframe.DataFrame": [[2, 2, 1, "", "__aiter__"], [2, 2, 1, "", "__arrow_c_stream__"], [2, 2, 1, "", "__getitem__"], [2, 2, 1, "", "__iter__"], [2, 2, 1, "", "__repr__"], [2, 2, 1, "", "_repr_html_"], [2, 2, 1, "", "aggregate"], [2, 2, 1, "", "cache"], [2, 2, 1, "", "cast"], [2, 2, 1, "", "collect"], [2, 2, 1, "", "collect_column"], [2, 2, 1, "", "collect_partitioned"], [2, 2, 1, "", "count"], [2, 2, 1, "", "default_str_repr"], [2, 2, 1, "", "describe"], [2, 3, 1, "", "df"], [2, 2, 1, "", "distinct"], [2, 2, 1, "", "drop"], [2, 2, 1, "", "except_all"], [2, 2, 1, "", "execute_stream"], [2, 2, 1, "", "execute_stream_partitioned"], [2, 2, 1, "", "execution_plan"], [2, 2, 1, "", "explain"], [2, 2, 1, "", "fill_null"], [2, 2, 1, "", "filter"], [2, 2, 1, "", "head"], [2, 2, 1, "", "intersect"], [2, 2, 1, "", "into_view"], [2, 2, 1, "", "join"], [2, 2, 1, "", "join_on"], [2, 2, 1, "", "limit"], [2, 2, 1, "", "logical_plan"], [2, 2, 1, "", "optimized_logical_plan"], [2, 2, 1, "", "parse_sql_expr"], [2, 2, 1, "", "repartition"], [2, 2, 1, "", "repartition_by_hash"], [2, 2, 1, "", "schema"], [2, 2, 1, "", "select"], [2, 2, 1, "", "select_columns"], [2, 2, 1, "", "select_exprs"], [2, 2, 1, "", "show"], [2, 2, 1, "", "sort"], [2, 2, 1, "", "tail"], [2, 2, 1, "", "to_arrow_table"], [2, 2, 1, "", "to_pandas"], [2, 2, 1, "", "to_polars"], [2, 2, 1, "", "to_pydict"], [2, 2, 1, "", "to_pylist"], [2, 2, 1, "", "transform"], [2, 2, 1, "", "union"], [2, 2, 1, "", "union_distinct"], [2, 2, 1, "", "unnest_columns"], [2, 2, 1, "", "with_column"], [2, 2, 1, "", "with_column_renamed"], [2, 2, 1, "", "with_columns"], [2, 2, 1, "", "write_csv"], [2, 2, 1, "", "write_json"], [2, 2, 1, "", "write_parquet"], [2, 2, 1, "", "write_parquet_with_options"], [2, 2, 1, "", "write_table"]], "datafusion.dataframe.DataFrameWriteOptions": [[2, 3, 1, "", "_raw_write_options"]], "datafusion.dataframe.InsertOp": [[2, 3, 1, "", "APPEND"], [2, 3, 1, "", "OVERWRITE"], [2, 3, 1, "", "REPLACE"]], "datafusion.dataframe.ParquetColumnOptions": [[2, 3, 1, "", "bloom_filter_enabled"], [2, 3, 1, "", "bloom_filter_fpp"], [2, 3, 1, "", "bloom_filter_ndv"], [2, 3, 1, "", "compression"], [2, 3, 1, "", "dictionary_enabled"], [2, 3, 1, "", "encoding"], [2, 3, 1, "", "statistics_enabled"]], "datafusion.dataframe.ParquetWriterOptions": [[2, 3, 1, "", "allow_single_file_parallelism"], [2, 3, 1, "", "bloom_filter_fpp"], [2, 3, 1, "", "bloom_filter_ndv"], [2, 3, 1, "", "bloom_filter_on_write"], [2, 3, 1, "", "column_index_truncate_length"], [2, 3, 1, "", "column_specific_options"], [2, 3, 1, "", "created_by"], [2, 3, 1, "", "data_page_row_count_limit"], [2, 3, 1, "", "data_pagesize_limit"], [2, 3, 1, "", "dictionary_enabled"], [2, 3, 1, "", "dictionary_page_size_limit"], [2, 3, 1, "", "encoding"], [2, 3, 1, "", "max_row_group_size"], [2, 3, 1, "", "maximum_buffered_record_batches_per_stream"], [2, 3, 1, "", "maximum_parallel_row_group_writers"], [2, 3, 1, "", "skip_arrow_metadata"], [2, 3, 1, "", "statistics_enabled"], [2, 3, 1, "", "statistics_truncate_length"], [2, 3, 1, "", "write_batch_size"], [2, 3, 1, "", "writer_version"]], "datafusion.dataframe_formatter": [[3, 1, 1, "", "CellFormatter"], [3, 1, 1, "", "DataFrameHtmlFormatter"], [3, 1, 1, "", "DefaultStyleProvider"], [3, 1, 1, "", "FormatterManager"], [3, 1, 1, "", "StyleProvider"], [3, 6, 1, "", "_refresh_formatter_reference"], [3, 6, 1, "", "_validate_bool"], [3, 6, 1, "", "_validate_formatter_parameters"], [3, 6, 1, "", "_validate_positive_int"], [3, 6, 1, "", "configure_formatter"], [3, 6, 1, "", "get_formatter"], [3, 6, 1, "", "reset_formatter"], [3, 6, 1, "", "set_formatter"]], "datafusion.dataframe_formatter.CellFormatter": [[3, 2, 1, "", "__call__"]], "datafusion.dataframe_formatter.DataFrameHtmlFormatter": [[3, 2, 1, "", "_build_expandable_cell"], [3, 2, 1, "", "_build_html_footer"], [3, 2, 1, "", "_build_html_header"], [3, 2, 1, "", "_build_regular_cell"], [3, 2, 1, "", "_build_table_body"], [3, 2, 1, "", "_build_table_container_start"], [3, 2, 1, "", "_build_table_header"], [3, 3, 1, "", "_custom_cell_builder"], [3, 3, 1, "", "_custom_header_builder"], [3, 2, 1, "", "_format_cell_value"], [3, 2, 1, "", "_get_cell_value"], [3, 2, 1, "", "_get_default_css"], [3, 2, 1, "", "_get_javascript"], [3, 3, 1, "", "_max_rows"], [3, 3, 1, "", "_type_formatters"], [3, 3, 1, "", "custom_css"], [3, 3, 1, "", "enable_cell_expansion"], [3, 2, 1, "", "format_html"], [3, 2, 1, "", "format_str"], [3, 3, 1, "", "max_cell_length"], [3, 3, 1, "", "max_height"], [3, 3, 1, "", "max_memory_bytes"], [3, 5, 1, "", "max_rows"], [3, 3, 1, "", "max_width"], [3, 3, 1, "", "min_rows"], [3, 2, 1, "", "register_formatter"], [3, 5, 1, "", "repr_rows"], [3, 2, 1, "", "set_custom_cell_builder"], [3, 2, 1, "", "set_custom_header_builder"], [3, 3, 1, "", "show_truncation_message"], [3, 3, 1, "", "style_provider"], [3, 3, 1, "", "use_shared_styles"]], "datafusion.dataframe_formatter.DefaultStyleProvider": [[3, 2, 1, "", "get_cell_style"], [3, 2, 1, "", "get_header_style"]], "datafusion.dataframe_formatter.FormatterManager": [[3, 3, 1, "", "_default_formatter"], [3, 2, 1, "", "get_formatter"], [3, 2, 1, "", "set_formatter"]], "datafusion.dataframe_formatter.StyleProvider": [[3, 2, 1, "", "get_cell_style"], [3, 2, 1, "", "get_header_style"]], "datafusion.expr": [[4, 4, 1, "", "Aggregate"], [4, 4, 1, "", "AggregateFunction"], [4, 4, 1, "", "Alias"], [4, 4, 1, "", "Analyze"], [4, 4, 1, "", "Between"], [4, 4, 1, "", "BinaryExpr"], [4, 4, 1, "", "Case"], [4, 1, 1, "", "CaseBuilder"], [4, 4, 1, "", "Cast"], [4, 4, 1, "", "Column"], [4, 4, 1, "", "CopyTo"], [4, 4, 1, "", "CreateCatalog"], [4, 4, 1, "", "CreateCatalogSchema"], [4, 4, 1, "", "CreateExternalTable"], [4, 4, 1, "", "CreateFunction"], [4, 4, 1, "", "CreateFunctionBody"], [4, 4, 1, "", "CreateIndex"], [4, 4, 1, "", "CreateMemoryTable"], [4, 4, 1, "", "CreateView"], [4, 4, 1, "", "Deallocate"], [4, 4, 1, "", "DescribeTable"], [4, 4, 1, "", "Distinct"], [4, 4, 1, "", "DmlStatement"], [4, 4, 1, "", "DropCatalogSchema"], [4, 4, 1, "", "DropFunction"], [4, 4, 1, "", "DropTable"], [4, 4, 1, "", "DropView"], [4, 4, 1, "", "EXPR_TYPE_ERROR"], [4, 4, 1, "", "EmptyRelation"], [4, 4, 1, "", "Execute"], [4, 4, 1, "", "Exists"], [4, 4, 1, "", "Explain"], [4, 1, 1, "", "Expr"], [4, 4, 1, "", "Extension"], [4, 4, 1, "", "FileType"], [4, 4, 1, "", "Filter"], [4, 4, 1, "", "GroupingSet"], [4, 4, 1, "", "ILike"], [4, 4, 1, "", "InList"], [4, 4, 1, "", "InSubquery"], [4, 4, 1, "", "IsFalse"], [4, 4, 1, "", "IsNotFalse"], [4, 4, 1, "", "IsNotNull"], [4, 4, 1, "", "IsNotTrue"], [4, 4, 1, "", "IsNotUnknown"], [4, 4, 1, "", "IsNull"], [4, 4, 1, "", "IsTrue"], [4, 4, 1, "", "IsUnknown"], [4, 4, 1, "", "Join"], [4, 4, 1, "", "JoinConstraint"], [4, 4, 1, "", "JoinType"], [4, 4, 1, "", "Like"], [4, 4, 1, "", "Limit"], [4, 4, 1, "", "Literal"], [4, 4, 1, "", "Negative"], [4, 4, 1, "", "Not"], [4, 4, 1, "", "OperateFunctionArg"], [4, 4, 1, "", "Partitioning"], [4, 4, 1, "", "Placeholder"], [4, 4, 1, "", "Prepare"], [4, 4, 1, "", "Projection"], [4, 4, 1, "", "RecursiveQuery"], [4, 4, 1, "", "Repartition"], [4, 4, 1, "", "ScalarSubquery"], [4, 4, 1, "", "ScalarVariable"], [4, 4, 1, "", "SetVariable"], [4, 4, 1, "", "SimilarTo"], [4, 4, 1, "", "Sort"], [4, 1, 1, "", "SortExpr"], [4, 4, 1, "", "SortKey"], [4, 4, 1, "", "Subquery"], [4, 4, 1, "", "SubqueryAlias"], [4, 4, 1, "", "TableScan"], [4, 4, 1, "", "TransactionAccessMode"], [4, 4, 1, "", "TransactionConclusion"], [4, 4, 1, "", "TransactionEnd"], [4, 4, 1, "", "TransactionIsolationLevel"], [4, 4, 1, "", "TransactionStart"], [4, 4, 1, "", "TryCast"], [4, 4, 1, "", "Union"], [4, 4, 1, "", "Unnest"], [4, 4, 1, "", "UnnestExpr"], [4, 4, 1, "", "Values"], [4, 1, 1, "", "Window"], [4, 4, 1, "", "WindowExpr"], [4, 1, 1, "", "WindowFrame"], [4, 1, 1, "", "WindowFrameBound"], [4, 6, 1, "", "ensure_expr"], [4, 6, 1, "", "ensure_expr_list"]], "datafusion.expr.CaseBuilder": [[4, 3, 1, "", "case_builder"], [4, 2, 1, "", "end"], [4, 2, 1, "", "otherwise"], [4, 2, 1, "", "when"]], "datafusion.expr.Expr": [[4, 2, 1, "", "__add__"], [4, 2, 1, "", "__and__"], [4, 2, 1, "", "__eq__"], [4, 2, 1, "", "__ge__"], [4, 2, 1, "", "__getitem__"], [4, 2, 1, "", "__gt__"], [4, 2, 1, "", "__invert__"], [4, 2, 1, "", "__le__"], [4, 2, 1, "", "__lt__"], [4, 2, 1, "", "__mod__"], [4, 2, 1, "", "__mul__"], [4, 2, 1, "", "__ne__"], [4, 2, 1, "", "__or__"], [4, 3, 1, "", "__radd__"], [4, 3, 1, "", "__rand__"], [4, 2, 1, "", "__repr__"], [4, 2, 1, "", "__richcmp__"], [4, 3, 1, "", "__rmod__"], [4, 3, 1, "", "__rmul__"], [4, 3, 1, "", "__ror__"], [4, 3, 1, "", "__rsub__"], [4, 3, 1, "", "__rtruediv__"], [4, 2, 1, "", "__sub__"], [4, 2, 1, "", "__truediv__"], [4, 3, 1, "", "_to_pyarrow_types"], [4, 2, 1, "", "abs"], [4, 2, 1, "", "acos"], [4, 2, 1, "", "acosh"], [4, 2, 1, "", "alias"], [4, 2, 1, "", "array_dims"], [4, 2, 1, "", "array_distinct"], [4, 2, 1, "", "array_empty"], [4, 2, 1, "", "array_length"], [4, 2, 1, "", "array_ndims"], [4, 2, 1, "", "array_pop_back"], [4, 2, 1, "", "array_pop_front"], [4, 2, 1, "", "arrow_typeof"], [4, 2, 1, "", "ascii"], [4, 2, 1, "", "asin"], [4, 2, 1, "", "asinh"], [4, 2, 1, "", "atan"], [4, 2, 1, "", "atanh"], [4, 2, 1, "", "between"], [4, 2, 1, "", "bit_length"], [4, 2, 1, "", "btrim"], [4, 2, 1, "", "canonical_name"], [4, 2, 1, "", "cardinality"], [4, 2, 1, "", "cast"], [4, 2, 1, "", "cbrt"], [4, 2, 1, "", "ceil"], [4, 2, 1, "", "char_length"], [4, 2, 1, "", "character_length"], [4, 2, 1, "", "chr"], [4, 2, 1, "", "column"], [4, 2, 1, "", "column_name"], [4, 2, 1, "", "cos"], [4, 2, 1, "", "cosh"], [4, 2, 1, "", "cot"], [4, 2, 1, "", "degrees"], [4, 2, 1, "", "display_name"], [4, 2, 1, "", "distinct"], [4, 2, 1, "", "empty"], [4, 2, 1, "", "exp"], [4, 3, 1, "", "expr"], [4, 2, 1, "", "factorial"], [4, 2, 1, "", "fill_nan"], [4, 2, 1, "", "fill_null"], [4, 2, 1, "", "filter"], [4, 2, 1, "", "flatten"], [4, 2, 1, "", "floor"], [4, 2, 1, "", "from_unixtime"], [4, 2, 1, "", "initcap"], [4, 2, 1, "", "is_not_null"], [4, 2, 1, "", "is_null"], [4, 2, 1, "", "isnan"], [4, 2, 1, "", "iszero"], [4, 2, 1, "", "length"], [4, 2, 1, "", "list_dims"], [4, 2, 1, "", "list_distinct"], [4, 2, 1, "", "list_length"], [4, 2, 1, "", "list_ndims"], [4, 2, 1, "", "literal"], [4, 2, 1, "", "literal_with_metadata"], [4, 2, 1, "", "ln"], [4, 2, 1, "", "log10"], [4, 2, 1, "", "log2"], [4, 2, 1, "", "lower"], [4, 2, 1, "", "ltrim"], [4, 2, 1, "", "md5"], [4, 2, 1, "", "null_treatment"], [4, 2, 1, "", "octet_length"], [4, 2, 1, "", "order_by"], [4, 2, 1, "", "over"], [4, 2, 1, "", "partition_by"], [4, 2, 1, "", "python_value"], [4, 2, 1, "", "radians"], [4, 2, 1, "", "reverse"], [4, 2, 1, "", "rex_call_operands"], [4, 2, 1, "", "rex_call_operator"], [4, 2, 1, "", "rex_type"], [4, 2, 1, "", "rtrim"], [4, 2, 1, "", "schema_name"], [4, 2, 1, "", "sha224"], [4, 2, 1, "", "sha256"], [4, 2, 1, "", "sha384"], [4, 2, 1, "", "sha512"], [4, 2, 1, "", "signum"], [4, 2, 1, "", "sin"], [4, 2, 1, "", "sinh"], [4, 2, 1, "", "sort"], [4, 2, 1, "", "sqrt"], [4, 2, 1, "", "string_literal"], [4, 2, 1, "", "tan"], [4, 2, 1, "", "tanh"], [4, 2, 1, "", "to_hex"], [4, 2, 1, "", "to_variant"], [4, 2, 1, "", "trim"], [4, 2, 1, "", "types"], [4, 2, 1, "", "upper"], [4, 2, 1, "", "variant_name"], [4, 2, 1, "", "window_frame"]], "datafusion.expr.SortExpr": [[4, 2, 1, "", "__repr__"], [4, 2, 1, "", "ascending"], [4, 2, 1, "", "expr"], [4, 2, 1, "", "nulls_first"], [4, 3, 1, "", "raw_sort"]], "datafusion.expr.Window": [[4, 3, 1, "", "_null_treatment"], [4, 3, 1, "", "_order_by"], [4, 3, 1, "", "_partition_by"], [4, 3, 1, "", "_window_frame"]], "datafusion.expr.WindowFrame": [[4, 2, 1, "", "__repr__"], [4, 2, 1, "", "get_frame_units"], [4, 2, 1, "", "get_lower_bound"], [4, 2, 1, "", "get_upper_bound"], [4, 3, 1, "", "window_frame"]], "datafusion.expr.WindowFrameBound": [[4, 3, 1, "", "frame_bound"], [4, 2, 1, "", "get_offset"], [4, 2, 1, "", "is_current_row"], [4, 2, 1, "", "is_following"], [4, 2, 1, "", "is_preceding"], [4, 2, 1, "", "is_unbounded"]], "datafusion.functions": [[5, 6, 1, "", "abs"], [5, 6, 1, "", "acos"], [5, 6, 1, "", "acosh"], [5, 6, 1, "", "alias"], [5, 6, 1, "", "approx_distinct"], [5, 6, 1, "", "approx_median"], [5, 6, 1, "", "approx_percentile_cont"], [5, 6, 1, "", "approx_percentile_cont_with_weight"], [5, 6, 1, "", "array"], [5, 6, 1, "", "array_agg"], [5, 6, 1, "", "array_append"], [5, 6, 1, "", "array_cat"], [5, 6, 1, "", "array_concat"], [5, 6, 1, "", "array_dims"], [5, 6, 1, "", "array_distinct"], [5, 6, 1, "", "array_element"], [5, 6, 1, "", "array_empty"], [5, 6, 1, "", "array_except"], [5, 6, 1, "", "array_extract"], [5, 6, 1, "", "array_has"], [5, 6, 1, "", "array_has_all"], [5, 6, 1, "", "array_has_any"], [5, 6, 1, "", "array_indexof"], [5, 6, 1, "", "array_intersect"], [5, 6, 1, "", "array_join"], [5, 6, 1, "", "array_length"], [5, 6, 1, "", "array_ndims"], [5, 6, 1, "", "array_pop_back"], [5, 6, 1, "", "array_pop_front"], [5, 6, 1, "", "array_position"], [5, 6, 1, "", "array_positions"], [5, 6, 1, "", "array_prepend"], [5, 6, 1, "", "array_push_back"], [5, 6, 1, "", "array_push_front"], [5, 6, 1, "", "array_remove"], [5, 6, 1, "", "array_remove_all"], [5, 6, 1, "", "array_remove_n"], [5, 6, 1, "", "array_repeat"], [5, 6, 1, "", "array_replace"], [5, 6, 1, "", "array_replace_all"], [5, 6, 1, "", "array_replace_n"], [5, 6, 1, "", "array_resize"], [5, 6, 1, "", "array_slice"], [5, 6, 1, "", "array_sort"], [5, 6, 1, "", "array_to_string"], [5, 6, 1, "", "array_union"], [5, 6, 1, "", "arrow_cast"], [5, 6, 1, "", "arrow_typeof"], [5, 6, 1, "", "ascii"], [5, 6, 1, "", "asin"], [5, 6, 1, "", "asinh"], [5, 6, 1, "", "atan"], [5, 6, 1, "", "atan2"], [5, 6, 1, "", "atanh"], [5, 6, 1, "", "avg"], [5, 6, 1, "", "bit_and"], [5, 6, 1, "", "bit_length"], [5, 6, 1, "", "bit_or"], [5, 6, 1, "", "bit_xor"], [5, 6, 1, "", "bool_and"], [5, 6, 1, "", "bool_or"], [5, 6, 1, "", "btrim"], [5, 6, 1, "", "cardinality"], [5, 6, 1, "", "case"], [5, 6, 1, "", "cbrt"], [5, 6, 1, "", "ceil"], [5, 6, 1, "", "char_length"], [5, 6, 1, "", "character_length"], [5, 6, 1, "", "chr"], [5, 6, 1, "", "coalesce"], [5, 6, 1, "", "col"], [5, 6, 1, "", "concat"], [5, 6, 1, "", "concat_ws"], [5, 6, 1, "", "corr"], [5, 6, 1, "", "cos"], [5, 6, 1, "", "cosh"], [5, 6, 1, "", "cot"], [5, 6, 1, "", "count"], [5, 6, 1, "", "count_star"], [5, 6, 1, "", "covar"], [5, 6, 1, "", "covar_pop"], [5, 6, 1, "", "covar_samp"], [5, 6, 1, "", "cume_dist"], [5, 6, 1, "", "current_date"], [5, 6, 1, "", "current_time"], [5, 6, 1, "", "date_bin"], [5, 6, 1, "", "date_part"], [5, 6, 1, "", "date_trunc"], [5, 6, 1, "", "datepart"], [5, 6, 1, "", "datetrunc"], [5, 6, 1, "", "decode"], [5, 6, 1, "", "degrees"], [5, 6, 1, "", "dense_rank"], [5, 6, 1, "", "digest"], [5, 6, 1, "", "empty"], [5, 6, 1, "", "encode"], [5, 6, 1, "", "ends_with"], [5, 6, 1, "", "exp"], [5, 6, 1, "", "extract"], [5, 6, 1, "", "factorial"], [5, 6, 1, "", "find_in_set"], [5, 6, 1, "", "first_value"], [5, 6, 1, "", "flatten"], [5, 6, 1, "", "floor"], [5, 6, 1, "", "from_unixtime"], [5, 6, 1, "", "gcd"], [5, 6, 1, "", "in_list"], [5, 6, 1, "", "initcap"], [5, 6, 1, "", "isnan"], [5, 6, 1, "", "iszero"], [5, 6, 1, "", "lag"], [5, 6, 1, "", "last_value"], [5, 6, 1, "", "lcm"], [5, 6, 1, "", "lead"], [5, 6, 1, "", "left"], [5, 6, 1, "", "length"], [5, 6, 1, "", "levenshtein"], [5, 6, 1, "", "list_append"], [5, 6, 1, "", "list_cat"], [5, 6, 1, "", "list_concat"], [5, 6, 1, "", "list_dims"], [5, 6, 1, "", "list_distinct"], [5, 6, 1, "", "list_element"], [5, 6, 1, "", "list_except"], [5, 6, 1, "", "list_extract"], [5, 6, 1, "", "list_indexof"], [5, 6, 1, "", "list_intersect"], [5, 6, 1, "", "list_join"], [5, 6, 1, "", "list_length"], [5, 6, 1, "", "list_ndims"], [5, 6, 1, "", "list_position"], [5, 6, 1, "", "list_positions"], [5, 6, 1, "", "list_prepend"], [5, 6, 1, "", "list_push_back"], [5, 6, 1, "", "list_push_front"], [5, 6, 1, "", "list_remove"], [5, 6, 1, "", "list_remove_all"], [5, 6, 1, "", "list_remove_n"], [5, 6, 1, "", "list_repeat"], [5, 6, 1, "", "list_replace"], [5, 6, 1, "", "list_replace_all"], [5, 6, 1, "", "list_replace_n"], [5, 6, 1, "", "list_resize"], [5, 6, 1, "", "list_slice"], [5, 6, 1, "", "list_sort"], [5, 6, 1, "", "list_to_string"], [5, 6, 1, "", "list_union"], [5, 6, 1, "", "ln"], [5, 6, 1, "", "log"], [5, 6, 1, "", "log10"], [5, 6, 1, "", "log2"], [5, 6, 1, "", "lower"], [5, 6, 1, "", "lpad"], [5, 6, 1, "", "ltrim"], [5, 6, 1, "", "make_array"], [5, 6, 1, "", "make_date"], [5, 6, 1, "", "make_list"], [5, 6, 1, "", "max"], [5, 6, 1, "", "md5"], [5, 6, 1, "", "mean"], [5, 6, 1, "", "median"], [5, 6, 1, "", "min"], [5, 6, 1, "", "named_struct"], [5, 6, 1, "", "nanvl"], [5, 6, 1, "", "now"], [5, 6, 1, "", "nth_value"], [5, 6, 1, "", "ntile"], [5, 6, 1, "", "nullif"], [5, 6, 1, "", "nvl"], [5, 6, 1, "", "octet_length"], [5, 6, 1, "", "order_by"], [5, 6, 1, "", "overlay"], [5, 6, 1, "", "percent_rank"], [5, 6, 1, "", "pi"], [5, 6, 1, "", "pow"], [5, 6, 1, "", "power"], [5, 6, 1, "", "radians"], [5, 6, 1, "", "random"], [5, 6, 1, "", "range"], [5, 6, 1, "", "rank"], [5, 6, 1, "", "regexp_count"], [5, 6, 1, "", "regexp_instr"], [5, 6, 1, "", "regexp_like"], [5, 6, 1, "", "regexp_match"], [5, 6, 1, "", "regexp_replace"], [5, 6, 1, "", "regr_avgx"], [5, 6, 1, "", "regr_avgy"], [5, 6, 1, "", "regr_count"], [5, 6, 1, "", "regr_intercept"], [5, 6, 1, "", "regr_r2"], [5, 6, 1, "", "regr_slope"], [5, 6, 1, "", "regr_sxx"], [5, 6, 1, "", "regr_sxy"], [5, 6, 1, "", "regr_syy"], [5, 6, 1, "", "repeat"], [5, 6, 1, "", "replace"], [5, 6, 1, "", "reverse"], [5, 6, 1, "", "right"], [5, 6, 1, "", "round"], [5, 6, 1, "", "row_number"], [5, 6, 1, "", "rpad"], [5, 6, 1, "", "rtrim"], [5, 6, 1, "", "sha224"], [5, 6, 1, "", "sha256"], [5, 6, 1, "", "sha384"], [5, 6, 1, "", "sha512"], [5, 6, 1, "", "signum"], [5, 6, 1, "", "sin"], [5, 6, 1, "", "sinh"], [5, 6, 1, "", "split_part"], [5, 6, 1, "", "sqrt"], [5, 6, 1, "", "starts_with"], [5, 6, 1, "", "stddev"], [5, 6, 1, "", "stddev_pop"], [5, 6, 1, "", "stddev_samp"], [5, 6, 1, "", "string_agg"], [5, 6, 1, "", "strpos"], [5, 6, 1, "", "struct"], [5, 6, 1, "", "substr"], [5, 6, 1, "", "substr_index"], [5, 6, 1, "", "substring"], [5, 6, 1, "", "sum"], [5, 6, 1, "", "tan"], [5, 6, 1, "", "tanh"], [5, 6, 1, "", "to_hex"], [5, 6, 1, "", "to_timestamp"], [5, 6, 1, "", "to_timestamp_micros"], [5, 6, 1, "", "to_timestamp_millis"], [5, 6, 1, "", "to_timestamp_nanos"], [5, 6, 1, "", "to_timestamp_seconds"], [5, 6, 1, "", "to_unixtime"], [5, 6, 1, "", "translate"], [5, 6, 1, "", "trim"], [5, 6, 1, "", "trunc"], [5, 6, 1, "", "upper"], [5, 6, 1, "", "uuid"], [5, 6, 1, "", "var"], [5, 6, 1, "", "var_pop"], [5, 6, 1, "", "var_samp"], [5, 6, 1, "", "var_sample"], [5, 6, 1, "", "when"], [5, 6, 1, "", "window"]], "datafusion.input": [[9, 1, 1, "", "LocationInputPlugin"], [8, 0, 0, "-", "base"], [10, 0, 0, "-", "location"]], "datafusion.input.LocationInputPlugin": [[9, 2, 1, "", "build_table"], [9, 2, 1, "", "is_correct_input"]], "datafusion.input.base": [[8, 1, 1, "", "BaseInputSource"]], "datafusion.input.base.BaseInputSource": [[8, 2, 1, "", "build_table"], [8, 2, 1, "", "is_correct_input"]], "datafusion.input.location": [[10, 1, 1, "", "LocationInputPlugin"]], "datafusion.input.location.LocationInputPlugin": [[10, 2, 1, "", "build_table"], [10, 2, 1, "", "is_correct_input"]], "datafusion.io": [[11, 6, 1, "", "read_avro"], [11, 6, 1, "", "read_csv"], [11, 6, 1, "", "read_json"], [11, 6, 1, "", "read_parquet"]], "datafusion.object_store": [[12, 4, 1, "", "AmazonS3"], [12, 4, 1, "", "GoogleCloud"], [12, 4, 1, "", "Http"], [12, 4, 1, "", "LocalFileSystem"], [12, 4, 1, "", "MicrosoftAzure"]], "datafusion.options": [[13, 1, 1, "", "CsvReadOptions"]], "datafusion.options.CsvReadOptions": [[13, 3, 1, "", "comment"], [13, 3, 1, "", "delimiter"], [13, 3, 1, "", "escape"], [13, 3, 1, "", "file_compression_type"], [13, 3, 1, "", "file_extension"], [13, 3, 1, "", "file_sort_order"], [13, 3, 1, "", "has_header"], [13, 3, 1, "", "newlines_in_values"], [13, 3, 1, "", "null_regex"], [13, 3, 1, "", "quote"], [13, 3, 1, "", "schema"], [13, 3, 1, "", "schema_infer_max_records"], [13, 3, 1, "", "table_partition_cols"], [13, 3, 1, "", "terminator"], [13, 2, 1, "", "to_inner"], [13, 3, 1, "", "truncated_rows"], [13, 2, 1, "", "with_comment"], [13, 2, 1, "", "with_delimiter"], [13, 2, 1, "", "with_escape"], [13, 2, 1, "", "with_file_compression_type"], [13, 2, 1, "", "with_file_extension"], [13, 2, 1, "", "with_file_sort_order"], [13, 2, 1, "", "with_has_header"], [13, 2, 1, "", "with_newlines_in_values"], [13, 2, 1, "", "with_null_regex"], [13, 2, 1, "", "with_quote"], [13, 2, 1, "", "with_schema"], [13, 2, 1, "", "with_schema_infer_max_records"], [13, 2, 1, "", "with_table_partition_cols"], [13, 2, 1, "", "with_terminator"], [13, 2, 1, "", "with_truncated_rows"]], "datafusion.plan": [[14, 1, 1, "", "ExecutionPlan"], [14, 1, 1, "", "LogicalPlan"]], "datafusion.plan.ExecutionPlan": [[14, 2, 1, "", "__repr__"], [14, 3, 1, "", "_raw_plan"], [14, 2, 1, "", "children"], [14, 2, 1, "", "display"], [14, 2, 1, "", "display_indent"], [14, 2, 1, "", "from_proto"], [14, 5, 1, "", "partition_count"], [14, 2, 1, "", "to_proto"]], "datafusion.plan.LogicalPlan": [[14, 2, 1, "", "__eq__"], [14, 2, 1, "", "__repr__"], [14, 3, 1, "", "_raw_plan"], [14, 2, 1, "", "display"], [14, 2, 1, "", "display_graphviz"], [14, 2, 1, "", "display_indent"], [14, 2, 1, "", "display_indent_schema"], [14, 2, 1, "", "from_proto"], [14, 2, 1, "", "inputs"], [14, 2, 1, "", "to_proto"], [14, 2, 1, "", "to_variant"]], "datafusion.record_batch": [[15, 1, 1, "", "RecordBatch"], [15, 1, 1, "", "RecordBatchStream"]], "datafusion.record_batch.RecordBatch": [[15, 2, 1, "", "__arrow_c_array__"], [15, 3, 1, "", "record_batch"], [15, 2, 1, "", "to_pyarrow"]], "datafusion.record_batch.RecordBatchStream": [[15, 2, 1, "", "__aiter__"], [15, 2, 1, "", "__anext__"], [15, 2, 1, "", "__iter__"], [15, 2, 1, "", "__next__"], [15, 2, 1, "", "next"], [15, 3, 1, "", "rbs"]], "datafusion.substrait": [[16, 1, 1, "", "Consumer"], [16, 1, 1, "", "Plan"], [16, 1, 1, "", "Producer"], [16, 1, 1, "", "Serde"]], "datafusion.substrait.Consumer": [[16, 2, 1, "", "from_substrait_plan"]], "datafusion.substrait.Plan": [[16, 2, 1, "", "encode"], [16, 2, 1, "", "from_json"], [16, 3, 1, "", "plan_internal"], [16, 2, 1, "", "to_json"]], "datafusion.substrait.Producer": [[16, 2, 1, "", "to_substrait_plan"]], "datafusion.substrait.Serde": [[16, 2, 1, "", "deserialize"], [16, 2, 1, "", "deserialize_bytes"], [16, 2, 1, "", "serialize"], [16, 2, 1, "", "serialize_bytes"], [16, 2, 1, "", "serialize_to_plan"]], "datafusion.unparser": [[17, 1, 1, "", "Dialect"], [17, 1, 1, "", "Unparser"]], "datafusion.unparser.Dialect": [[17, 2, 1, "", "default"], [17, 3, 1, "", "dialect"], [17, 2, 1, "", "duckdb"], [17, 2, 1, "", "mysql"], [17, 2, 1, "", "postgres"], [17, 2, 1, "", "sqlite"]], "datafusion.unparser.Unparser": [[17, 2, 1, "", "plan_to_sql"], [17, 3, 1, "", "unparser"], [17, 2, 1, "", "with_pretty"]], "datafusion.user_defined": [[18, 1, 1, "", "Accumulator"], [18, 1, 1, "", "AggregateUDF"], [18, 1, 1, "", "AggregateUDFExportable"], [18, 1, 1, "", "ScalarUDF"], [18, 1, 1, "", "ScalarUDFExportable"], [18, 1, 1, "", "TableFunction"], [18, 1, 1, "", "Volatility"], [18, 1, 1, "", "WindowEvaluator"], [18, 1, 1, "", "WindowUDF"], [18, 1, 1, "", "WindowUDFExportable"], [18, 4, 1, "", "_R"], [18, 6, 1, "", "_is_pycapsule"], [18, 6, 1, "", "data_type_or_field_to_field"], [18, 6, 1, "", "data_types_or_fields_to_field_list"], [18, 4, 1, "", "udaf"], [18, 4, 1, "", "udf"], [18, 4, 1, "", "udtf"], [18, 4, 1, "", "udwf"]], "datafusion.user_defined.Accumulator": [[18, 2, 1, "", "evaluate"], [18, 2, 1, "", "merge"], [18, 2, 1, "", "state"], [18, 2, 1, "", "update"]], "datafusion.user_defined.AggregateUDF": [[18, 2, 1, "", "__call__"], [18, 2, 1, "", "__repr__"], [18, 3, 1, "", "_udaf"], [18, 2, 1, "", "from_pycapsule"], [18, 2, 1, "", "udaf"]], "datafusion.user_defined.AggregateUDFExportable": [[18, 2, 1, "", "__datafusion_aggregate_udf__"]], "datafusion.user_defined.ScalarUDF": [[18, 2, 1, "", "__call__"], [18, 2, 1, "", "__repr__"], [18, 3, 1, "", "_udf"], [18, 2, 1, "", "from_pycapsule"], [18, 2, 1, "", "udf"]], "datafusion.user_defined.ScalarUDFExportable": [[18, 2, 1, "", "__datafusion_scalar_udf__"]], "datafusion.user_defined.TableFunction": [[18, 2, 1, "", "__call__"], [18, 2, 1, "", "__repr__"], [18, 2, 1, "", "_create_table_udf"], [18, 2, 1, "", "_create_table_udf_decorator"], [18, 3, 1, "", "_udtf"], [18, 2, 1, "", "udtf"]], "datafusion.user_defined.Volatility": [[18, 3, 1, "", "Immutable"], [18, 3, 1, "", "Stable"], [18, 3, 1, "", "Volatile"], [18, 2, 1, "", "__str__"]], "datafusion.user_defined.WindowEvaluator": [[18, 2, 1, "", "evaluate"], [18, 2, 1, "", "evaluate_all"], [18, 2, 1, "", "evaluate_all_with_rank"], [18, 2, 1, "", "get_range"], [18, 2, 1, "", "include_rank"], [18, 2, 1, "", "is_causal"], [18, 2, 1, "", "memoize"], [18, 2, 1, "", "supports_bounded_execution"], [18, 2, 1, "", "uses_window_frame"]], "datafusion.user_defined.WindowUDF": [[18, 2, 1, "", "__call__"], [18, 2, 1, "", "__repr__"], [18, 2, 1, "", "_create_window_udf"], [18, 2, 1, "", "_create_window_udf_decorator"], [18, 2, 1, "", "_get_default_name"], [18, 2, 1, "", "_normalize_input_types"], [18, 3, 1, "", "_udwf"], [18, 2, 1, "", "from_pycapsule"], [18, 2, 1, "", "udwf"]], "datafusion.user_defined.WindowUDFExportable": [[18, 2, 1, "", "__datafusion_window_udf__"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "attribute", "Python attribute"], "4": ["py", "data", "Python data"], "5": ["py", "property", "Python property"], "6": ["py", "function", "Python function"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:attribute", "4": "py:data", "5": "py:property", "6": "py:function"}, "terms": {"": [1, 2, 3, 4, 5, 7, 13, 18, 20, 21, 26, 27, 29, 31, 32, 33, 34, 35, 36], "0": [2, 4, 5, 7, 18, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 45, 46], "007bff": 37, "01": [23, 27, 30], "01t00": 27, "03": 27, "038": 34, "04023": 5, "05": [27, 30], "06": [27, 30], "08": 27, "09": [23, 27], "1": [2, 4, 5, 7, 18, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 46], "10": [2, 3, 4, 5, 7, 18, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 46], "100": [3, 5, 22, 23, 24, 33, 35, 36, 37, 38, 46], "1000": [1, 3, 7, 11, 13, 25, 37], "10000000": 34, "1024": [1, 2, 3, 7, 37], "103": [22, 35, 38], "104": [22, 35, 38, 46], "1048576": [2, 7], "105": [22, 24, 35, 38], "107": [24, 34], "109": [22, 35, 38, 46], "11": [22, 23, 24, 26, 27, 29, 31, 33, 35, 38], "110": 24, "111": [22, 35, 38, 46], "112": 24, "115": [22, 24, 35, 38], "12": [21, 22, 23, 24, 26, 27, 29, 31, 33, 35, 38], "120": [5, 22, 24, 35, 38], "121": 24, "122": [22, 35, 38], "123": [22, 35, 38, 46], "12371": 5, "125": [5, 24, 27, 46], "128": [4, 5, 7], "12t09": 27, "13": [22, 24, 26, 27, 29, 31, 33, 35, 38], "130": [22, 35, 38, 46], "135": [22, 35, 38], "136": 24, "14": [22, 23, 24, 26, 27, 30, 31, 35, 38], "140": 24, "145": [22, 24, 35, 38], "149": 46, "15": [22, 23, 24, 26, 27, 30, 31, 33, 35, 38], "150": [22, 33, 35, 38], "158": 46, "159": [22, 35, 38, 46], "16": [23, 24, 26, 27, 31, 34], "160": 46, "161": 46, "162": 46, "163": 46, "165": [27, 46], "17": [23, 24, 26, 27, 31, 33], "18": [23, 24, 26, 27, 31, 33], "19": [24, 26, 27, 46], "190": 46, "1902": 5, "1921": 27, "195": [22, 35, 38], "1970": 27, "1m": [2, 7], "1px": 37, "2": [2, 3, 4, 5, 7, 18, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 35, 36, 38, 39, 46], "20": [2, 5, 7, 18, 22, 24, 26, 27, 32, 33, 35, 37, 38, 46], "200": 46, "20000": [2, 7], "2021": 23, "2026": 27, "205": [22, 35, 38], "2097152": [3, 37], "21": [23, 24, 26, 27, 33, 36], "22": [2, 26, 35], "223": 46, "224": [4, 5, 7], "229": 46, "23": [23, 25, 26, 33, 46], "23076923076923": 24, "2345": 5, "24": [23, 26], "25": [3, 5, 22, 24, 26, 27, 33, 35, 36, 37, 38], "256": [4, 5, 7], "25806451612904": 24, "26": [26, 33], "27": 26, "28": [23, 26, 33], "28571428571429": 24, "29": 26, "2mb": [3, 37], "3": [2, 5, 7, 18, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 35, 36, 38, 39, 46], "30": [22, 24, 26, 32, 33, 35, 38], "300": [3, 37], "309": [22, 27, 35, 38], "31": 26, "314": [22, 27, 35, 38], "318": [22, 27, 35, 38], "32": 26, "33": [23, 26], "333333333333332": 33, "333333333333336": 24, "34": [26, 27], "35": [22, 23, 24, 26, 30, 33, 35, 38], "36": [23, 26], "37": 26, "378": 25, "38": 26, "382": 25, "384": [4, 5, 7], "39": [22, 26, 35, 38, 46], "395": [22, 35, 38], "3rd": 5, "3x": 34, "4": [2, 5, 18, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 46], "40": [5, 21, 22, 24, 26, 32, 35, 38], "405": [22, 27, 35, 38], "41": 26, "42": [24, 26, 33, 35], "42857142857143": 33, "43": [22, 26, 33, 35, 38, 45], "44": [22, 26, 35, 38], "45": [22, 24, 26, 27, 33, 35, 38], "4579": 27, "46": [24, 26], "47": 24, "4732": 27, "48": [22, 24, 35, 38], "49": [22, 26, 35, 38], "495": [22, 35, 38], "4mb": 37, "5": [2, 5, 18, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 35, 36, 38, 39, 46], "50": [3, 5, 7, 22, 24, 32, 35, 37, 38], "500": [3, 7], "5000000": 34, "51": [23, 24, 29, 46], "512": [4, 5, 7], "5129": 27, "52": [22, 24, 31, 35, 38], "525": [22, 27, 35, 38], "53": [24, 27], "530": [22, 35, 38], "534": [22, 27, 35, 38], "54": [24, 26, 27], "55": [22, 24, 33, 35, 38], "5580536720887": 25, "56": [24, 27], "569": 25, "58": [22, 24, 35, 38], "5811388300841898": 25, "59": [22, 35, 38], "6": [22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 35, 36, 38, 39, 46], "60": [5, 22, 24, 33, 35, 38], "615": 25, "62": [22, 35, 38], "625": [22, 27, 35, 38], "63": [22, 24, 35, 38], "630": [22, 35, 38], "634": [22, 27, 35, 38], "64": [2, 7, 21, 22, 33, 35, 38], "65": [22, 24, 26, 30, 33, 35, 38], "66": [23, 24], "666666666666668": 33, "66666666666667": 24, "666667": 5, "67": [24, 26], "7": [22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 35, 38, 39, 46], "70": [22, 24, 33, 35, 38], "71": [24, 26], "72": 24, "73": 23, "732": 27, "75": [5, 22, 24, 35, 38, 46], "757149654": 27, "76": 33, "77777777777777": 24, "78": [22, 23, 35, 38, 46], "785714285714285": 24, "78571428571429": 24, "79": [22, 23, 35, 38], "8": [22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 38, 39, 46], "80": [5, 22, 24, 33, 35, 38], "81": 23, "82": [22, 35, 38, 46], "83": [22, 24, 35, 38, 46], "833333333333336": 24, "84": [22, 33, 35, 38, 46], "85": [22, 24, 35, 38], "855": 27, "86": 24, "86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac": [4, 7], "887": 25, "88888888888889": 24, "8px": 37, "9": [5, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 35, 38, 39, 46], "90": [5, 22, 24, 33, 35, 38], "91": 24, "92": 23, "939": 25, "94": [23, 30], "95": [23, 24, 33, 46], "96": [23, 24], "972": 27, "98": [24, 46], "A": [0, 1, 2, 4, 5, 7, 8, 14, 16, 18, 23, 25, 29, 33, 35, 36, 41, 45, 47], "AND": [2, 4, 5, 7, 18], "AS": 2, "As": [5, 7, 18, 20, 26, 30, 31, 35], "At": [5, 20], "BY": 18, "Be": 2, "By": [2, 7, 13, 20, 21, 24, 35], "For": [1, 2, 5, 16, 17, 18, 20, 21, 23, 24, 26, 27, 29, 30, 31, 34, 36, 37, 38, 47], "INTO": [1, 7], "If": [0, 1, 2, 3, 4, 5, 7, 8, 11, 13, 18, 20, 21, 24, 27, 29, 31, 33, 35, 38, 41, 45, 46], "In": [7, 18, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 35, 37, 38, 39, 46], "It": [1, 2, 3, 4, 5, 7, 18, 20, 22, 23, 24, 26, 29, 44], "Its": 22, "No": [2, 7], "Not": [2, 4, 7], "OR": [4, 5, 7], "On": [2, 20, 35], "One": [2, 4, 20, 24, 33, 35], "Or": [34, 37], "That": [4, 7], "The": [0, 1, 2, 3, 4, 5, 7, 9, 10, 11, 14, 16, 18, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 35, 36, 37, 39, 46, 47], "Then": 31, "There": [2, 5, 7, 27, 31, 35], "These": [2, 5, 7, 15, 18, 24, 26, 31, 34, 36, 37, 46], "To": [2, 5, 20, 21, 26, 27, 30, 31, 33, 34, 35, 36, 38, 39, 45, 46, 47], "Will": [2, 37], "With": [4, 7, 36, 38, 39], "_": 20, "__add__": [4, 7], "__aiter__": [2, 7, 15], "__and__": [4, 7], "__anext__": [7, 15], "__arrow_c_array__": [1, 7, 15, 39], "__arrow_c_stream__": [1, 2, 36, 39], "__call__": [3, 7, 18], "__datafusion_aggregate_udf__": 18, "__datafusion_catalog_provider__": 47, "__datafusion_logical_extension_codec__": [1, 47], "__datafusion_scalar_udf__": 18, "__datafusion_table_function__": 31, "__datafusion_table_provider__": [1, 20, 45], "__datafusion_task_context_provider__": 1, "__datafusion_window_udf__": 18, "__eq__": [4, 7, 14], "__ge__": [4, 7], "__getitem__": [2, 4, 7], "__gt__": [4, 7], "__init__": [7, 18, 31], "__invert__": [4, 7], "__iter__": [2, 7, 15], "__le__": [4, 7], "__lt__": [4, 7], "__mod__": [4, 7], "__mul__": [4, 7], "__ne__": [4, 7], "__next__": [7, 15], "__or__": [4, 7], "__radd__": [4, 7], "__rand__": [4, 7], "__repr__": [0, 1, 2, 3, 4, 7, 14, 18, 37], "__richcmp__": [4, 7], "__rmod__": [4, 7], "__rmul__": [4, 7], "__ror__": [4, 7], "__rsub__": [4, 7], "__rtruediv__": [4, 7], "__slots__": [0, 7], "__str__": 18, "__sub__": [4, 7], "__truediv__": [4, 7], "__version__": 38, "_aggreg": 5, "_build_expandable_cel": 3, "_build_html_foot": 3, "_build_html_head": 3, "_build_regular_cel": 3, "_build_table_bodi": 3, "_build_table_container_start": 3, "_build_table_head": 3, "_convert_file_sort_ord": 1, "_convert_table_partition_col": 1, "_create_table_udf": [7, 18], "_create_table_udf_decor": [7, 18], "_create_window_udf": [7, 18], "_create_window_udf_decor": [7, 18], "_custom_cell_build": 3, "_custom_header_build": 3, "_default_formatt": 3, "_export_to_c_capsul": 2, "_format_cell_valu": 3, "_get_cell_valu": 3, "_get_default_css": 3, "_get_default_nam": [7, 18], "_get_javascript": 3, "_inner": [0, 7], "_intern": [0, 1, 2, 4, 7, 13, 14, 15, 16, 17], "_io_custom_table_provid": 31, "_is_pycapsul": 18, "_max_row": 3, "_normalize_input_typ": [7, 18], "_null_treat": 4, "_order_bi": 4, "_partition_bi": 4, "_r": [7, 18], "_raw_plan": [7, 14], "_raw_schema": 0, "_raw_write_opt": [2, 7], "_refresh_formatter_refer": 3, "_repr_html_": [2, 3, 37], "_sum": [7, 18, 31], "_to_pyarrow_typ": [4, 7], "_type_formatt": 3, "_typesh": [7, 18], "_udaf": [7, 18], "_udf": [7, 18], "_udtf": [7, 18], "_udwf": [7, 18], "_validate_bool": 3, "_validate_formatter_paramet": 3, "_validate_positive_int": 3, "_window_fram": 4, "_window_funct": 5, "a0": 26, "a_siz": 26, "ab": [4, 5, 7], "abc": [0, 1, 2, 3, 4, 7, 8, 18], "abi": 20, "abi3": 21, "abi_st": 20, "abil": [35, 46], "abl": [7, 14], "about": [16, 17, 20, 22, 33, 34, 36], "abov": [5, 20, 24, 29, 33, 36, 46], "absolut": [4, 5, 7], "abstract": [0, 7, 8, 18, 26, 31, 35, 36], "accept": [1, 2, 4, 5, 7, 36, 39], "access": [1, 3, 7, 11, 20, 26, 35, 36, 47], "access_key_id": 35, "account": 35, "accum": [7, 18], "accumul": [7, 18, 31], "accur": 34, "achiev": 22, "aco": [4, 5, 7], "acosh": [4, 5, 7], "acronym": 20, "across": [3, 18, 20, 26, 34, 37, 46], "act": [7, 14], "action": 36, "activ": [20, 21], "actual": [2, 34, 36], "ad": [2, 20, 35], "adapt": 20, "add": [0, 1, 2, 3, 4, 5, 20, 21, 36], "add_3": 2, "addit": [2, 3, 4, 5, 7, 16, 17, 18, 20, 21, 31, 34, 35, 36, 41, 47], "addition": [20, 23], "adhoc": 21, "adopt": 20, "advanc": [0, 1, 2, 7, 11, 31, 35, 36, 37], "advantag": [20, 21, 22], "affect": [7, 13, 18, 24, 34, 37], "after": [1, 2, 3, 4, 5, 7, 18, 29, 31, 37], "ag": [26, 36], "against": [1, 2, 4, 5, 18, 20, 22, 32, 37], "age_col": 26, "age_in_year": 26, "agg": 2, "aggreg": [1, 2, 4, 5, 7, 18, 23, 28, 34, 36], "aggregatefunct": 4, "aggregateudf": [1, 7, 18], "aggregateudfexport": [7, 18], "agnost": 36, "aim": 38, "aiter": 2, "albert": 26, "algorithm": [2, 5], "alia": [0, 1, 2, 3, 4, 5, 7, 23, 24, 26, 27, 30, 31, 33, 36, 39], "alias": 21, "alic": 29, "align": [2, 37], "all": [0, 1, 2, 3, 4, 5, 7, 13, 18, 20, 21, 23, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 41, 46], "alloc": [2, 34], "allow": [1, 2, 3, 7, 13, 15, 18, 20, 21, 22, 24, 26, 27, 31, 34, 35, 36, 37, 41, 46], "allow_single_file_parallel": [2, 7], "alpha": [31, 35], "alreadi": [2, 5, 7, 20], "also": [1, 2, 3, 7, 18, 20, 21, 22, 24, 26, 27, 31, 33, 36, 37, 38, 46], "altern": [30, 41, 44], "alternate_a": 2, "alwai": [18, 37], "amazons3": [12, 35], "ambigu": 29, "amount": [2, 18, 36], "an": [0, 1, 2, 3, 4, 5, 7, 11, 13, 14, 15, 16, 18, 20, 21, 23, 24, 25, 26, 29, 30, 31, 33, 34, 35, 36, 38, 39, 40, 41, 44, 46, 47], "analyt": 33, "analyz": [2, 4, 21], "ani": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 14, 18, 20, 24, 29, 31, 33, 35, 36, 39], "annot": 21, "anoth": [2, 5, 7, 11, 20, 46], "anti": [2, 28], "apach": [1, 2, 4, 5, 7, 20, 21, 22, 39], "apart": 22, "api": [1, 2, 4, 7, 14, 20, 21, 22, 27, 33, 34, 35, 36, 37, 46], "appear": [4, 5, 7, 20], "append": [2, 5, 7, 31, 35], "appli": [2, 3, 7, 15, 20, 37], "applic": [20, 34], "approach": [5, 24, 26, 31, 34, 35, 46], "appropri": [18, 21, 31, 35, 37], "approx_distinct": [5, 24], "approx_median": [5, 24], "approx_percentile_cont": [5, 24], "approx_percentile_cont_with_weight": [5, 24], "approxim": [5, 24], "ar": [1, 2, 3, 4, 5, 7, 11, 13, 14, 15, 18, 20, 21, 23, 24, 26, 28, 30, 31, 33, 34, 35, 36, 37, 38, 39, 46], "arbitrari": [2, 5], "arc": [4, 5, 7, 20, 31, 45, 47], "architectur": 34, "area": 20, "arg": [1, 2, 5, 7, 16, 18, 21], "argument": [2, 4, 5, 7, 18, 24, 31], "arithmet": 36, "around": [5, 21, 35], "arr": 36, "arrai": [1, 2, 3, 4, 5, 7, 18, 24, 28, 31, 35, 36, 39], "array1": 5, "array2": 5, "array_agg": [5, 24], "array_append": [5, 21], "array_cat": [5, 26], "array_concat": [5, 26], "array_dim": [4, 5, 7], "array_distinct": [4, 5, 7], "array_el": [4, 5, 7, 26], "array_empti": [4, 5, 7, 26], "array_except": 5, "array_extract": 5, "array_ha": 5, "array_has_al": 5, "array_has_ani": 5, "array_indexof": 5, "array_intersect": 5, "array_join": 5, "array_length": [4, 5, 7], "array_ndim": [4, 5, 7], "array_pop_back": [4, 5, 7], "array_pop_front": [4, 5, 7], "array_posit": 5, "array_prepend": 5, "array_push_back": 5, "array_push_front": 5, "array_remov": 5, "array_remove_al": 5, "array_remove_n": 5, "array_repeat": [5, 26], "array_replac": 5, "array_replace_al": 5, "array_replace_n": 5, "array_res": 5, "array_slic": [4, 5, 7], "array_sort": 5, "array_to_str": 5, "array_union": 5, "arriv": 39, "arro3": [7, 18, 31], "arrow": [1, 2, 3, 4, 5, 7, 15, 18, 21, 22, 25, 35, 38, 42], "arrow_cast": [4, 5, 7, 27], "arrow_datafusion_python_root": 21, "arrow_t": 36, "arrow_typ": 20, "arrow_typeof": [4, 5, 7], "arrowarrai": [7, 15], "arrowarrayexport": 1, "arrowarraystream": 2, "arrowschema": [7, 15], "arrowstreamexport": 1, "arxiv": 5, "as_pi": [7, 18, 31], "ascend": [4, 5, 7, 24, 33, 36], "ascii": [4, 5, 7, 13], "asin": [4, 5, 7], "asinh": [4, 5, 7], "ask": 20, "assembl": 1, "assign": [4, 5, 7], "assist": 21, "associ": [0, 1, 4, 5, 7, 20], "assum": [2, 5, 7, 18, 21, 35], "assumpt": 22, "async": [2, 7, 15, 36], "asynchron": [7, 15, 36], "asyncio": 36, "asynciter": 2, "atan": [4, 5, 7], "atan2": 5, "atanh": [4, 5, 7], "atk": [22, 35, 38], "attach": [4, 5, 7, 34], "attack": [22, 24, 27, 33, 35, 38, 46], "attempt": [1, 2, 7, 15, 18, 20, 21, 31], "attr_nam": 47, "attribut": 20, "auto": 19, "autoapi": 19, "automat": [1, 2, 7, 34, 36, 37], "avail": [2, 3, 7, 24, 28, 31, 34, 35, 36, 45], "averag": [5, 18, 33], "avg": [5, 18, 24, 33], "avoid": [1, 7, 11, 20], "avro": [1, 7, 11, 35, 36, 37, 42], "await": 36, "awar": [2, 30], "aws_access_key_id": 35, "aws_secret_access_kei": 35, "b": [2, 5, 25, 26, 31, 32, 35, 36, 39], "back": [4, 7, 18, 20, 22, 31, 35, 37, 39], "background": 37, "backward": 3, "balanc": 37, "bar": 34, "base": [0, 1, 2, 3, 4, 5, 7, 9, 10, 11, 13, 18, 19, 26, 29, 31, 34, 35], "base64": 5, "baseinputsourc": [8, 9, 10], "basi": [2, 7, 18, 31], "basic": [7, 18, 23, 28, 30, 31, 34, 35], "basic_typ": 27, "batch": [1, 2, 3, 7, 14, 15, 18, 25, 31, 32, 35, 36, 37, 39], "batch_arrai": 31, "batch_siz": [1, 7], "becaus": [2, 4, 7, 20, 31, 35], "becca": 26, "beedril": [22, 33, 35, 38], "beedrillmega": [22, 33, 35, 38], "been": [1, 3, 20, 31], "befor": [3, 5, 21, 26, 31, 37], "beforehand": [1, 7], "begin": [1, 4, 5, 7, 13, 18, 37], "behavior": [5, 7, 13], "being": 2, "below": [23, 24, 35, 36], "benefit": [2, 7, 34], "best": [1, 2, 7, 18, 20, 31], "beta": 35, "better": [2, 7, 37], "between": [1, 4, 5, 7, 18, 20, 22, 23, 29, 31, 33, 34], "bia": [7, 18], "bias_10": [7, 18], "biased_numb": [7, 18], "biasednumb": [7, 18], "bin": [5, 21], "binari": [4, 5, 7, 14, 20, 47], "binaryexpr": [4, 7], "bind": [7, 18, 20, 21, 22, 35, 38], "bit": [4, 5, 7], "bit_and": [5, 24], "bit_length": [4, 5, 7], "bit_or": [5, 24], "bit_pack": [2, 7], "bit_xor": [5, 24], "bitwis": [5, 26], "blake2": 5, "blake2b": 5, "blake3": 5, "blastois": [22, 35, 38], "blastoisemega": [22, 35, 38], "blob": [4, 7], "blog": [21, 31], "bloom": [2, 7], "bloom_filter_en": [2, 7], "bloom_filter_fpp": [2, 7], "bloom_filter_ndv": [2, 7], "bloom_filter_on_writ": [2, 7], "blue": 26, "bob": 29, "bodi": 3, "bool": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 13, 14, 17, 18], "bool_": 31, "bool_and": [5, 24], "bool_or": [5, 24], "boolean": [3, 4, 5, 7, 28], "boost": [2, 7], "bootstrap": 21, "border": 37, "borrow": 20, "both": [2, 4, 5, 7, 18, 20, 21, 26, 29, 31, 36, 46], "bottleneck": 34, "bound": [4, 7, 18, 31, 33, 45, 47], "boundari": [18, 20, 33, 36], "bow": 33, "box": 20, "bracket": 26, "break": 20, "bronz": 5, "brotli": [2, 7], "btrim": [4, 5, 7], "bucket_nam": 35, "bug": [20, 22, 24, 33, 35, 38], "build": [3, 4, 5, 7, 20, 22, 23, 24, 33, 36, 47], "build_flag": 21, "build_tabl": [8, 9, 10], "builder": [3, 4, 5, 7, 13, 24, 33], "built": [0, 2, 7, 18, 20, 24, 26, 27, 31], "bulb": 27, "bulbafleur": 27, "bulbasaur": [22, 27, 33, 35, 38], "bulk": 20, "butterfre": [22, 33, 35, 38], "button": 3, "byte": [1, 2, 3, 4, 5, 7, 14, 16], "byte_stream_split": [2, 7], "bz2": [7, 13], "c": [1, 2, 7, 15, 18, 20, 21, 22, 25, 35, 36, 39], "cach": [2, 3, 7], "calcul": [2, 5, 18, 31], "call": [0, 2, 3, 4, 5, 7, 14, 15, 16, 17, 18, 20, 23, 24, 26, 31, 35, 36, 37], "call0": 47, "callabl": [2, 3, 7, 18], "caller": 20, "can": [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 14, 18, 20, 21, 23, 24, 26, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 45, 46, 47], "can_retir": 26, "candid": 4, "cannot": [1, 5, 7, 11, 46], "canon": 20, "canonical_nam": [4, 7], "capabl": [34, 46], "capit": [4, 5, 7, 30], "capsul": [2, 20, 47], "capsule_t": 45, "capsuletyp": [7, 18], "captur": 5, "cardin": [4, 5, 7, 26], "carefulli": 20, "cargo": 21, "carlo": 26, "cascad": [0, 7], "case": [2, 4, 5, 7, 18, 20, 24, 27, 30, 31, 33, 34, 35, 46], "case_build": 4, "casebuild": [4, 5], "cast": [2, 4, 5, 7, 28, 46], "castabl": [4, 7], "catalog": [1, 2, 7, 17, 19, 47], "catalog_list": 0, "catalog_nam": [0, 1], "cataloglist": [0, 1], "catalogprovid": [0, 1, 35, 46, 47], "catalogproviderexport": [0, 1], "catalogproviderlist": [0, 1], "catalogproviderlistexport": 1, "categori": [2, 27], "caterpi": [22, 33, 35, 38], "caus": [1, 7, 21, 33], "cbrt": [4, 5, 7], "cdatainterfac": [1, 2], "ceil": [4, 5, 7], "cell": [3, 37], "cellformatt": 3, "certain": 18, "certainli": 21, "chain": [2, 7, 13], "challeng": 20, "chang": [3, 18, 21, 26, 37], "chansei": 24, "char": 27, "char_length": [4, 5, 7, 27], "charact": [3, 4, 5, 7, 13, 37, 41], "character_length": [4, 5, 7], "characterist": 34, "charizard": [22, 27, 33, 35, 38, 46], "charizardmega": [22, 27, 33, 35, 38, 46], "charli": 29, "charmand": [22, 27, 33, 35, 38], "charmeleon": [22, 27, 33, 35, 38], "check": [5, 26, 27], "checksum": [4, 5, 7], "child": 37, "children": [7, 14], "chr": [4, 5, 7], "chrono": 5, "chunk": [2, 7], "chunkedarrai": 2, "ci": [20, 21], "citycab": 29, "class": [21, 31, 35, 37], "classmethod": [1, 2, 3], "classvar": [4, 7], "claus": [18, 33], "clean": 21, "clefabl": 33, "clefairi": [24, 33], "click": 3, "clone": [20, 21, 31, 45, 47], "close": 33, "cloud": 34, "co": [4, 5, 7], "coalesc": [2, 5, 27, 29], "coalesce_duplicate_kei": [2, 29], "code": [3, 4, 5, 7, 20, 27, 35], "codebas": 21, "codec": [1, 2, 7, 47], "coeffici": 5, "coerc": 5, "coercion": 2, "col": [2, 3, 4, 5, 7, 18, 23, 24, 26, 27, 30, 31, 32, 33, 34, 36, 39, 46], "col1": [18, 36], "col2": 36, "col_attack": 24, "col_diff": 31, "col_idx": 3, "col_spe": 24, "col_type_1": 24, "col_type_2": 24, "collaps": [3, 37], "collect": [1, 2, 3, 4, 7, 18, 22, 23, 25, 32, 34, 36, 46], "collect_column": [2, 36], "collect_partit": 2, "collid": [2, 7], "color": [26, 37], "column": [1, 2, 3, 4, 5, 7, 11, 13, 15, 18, 23, 24, 25, 27, 28, 29, 31, 33, 34, 41, 46], "column_a": 4, "column_index_truncate_length": [2, 7], "column_nam": [2, 4, 7, 13], "column_specific_opt": [2, 7], "com": [4, 5, 7, 21], "combin": [1, 2, 5, 7, 11, 26, 29, 30, 31, 34], "come": [35, 43], "command": [1, 7, 21, 35], "comment": [7, 13, 20], "common": [4, 5, 7, 8, 9, 10, 20, 24, 27, 29, 33, 35, 39], "commun": [20, 21], "compar": [5, 33], "comparison": [4, 7, 24, 36, 46], "compat": [3, 20, 26, 36], "compel": 20, "compet": 34, "compil": [20, 26], "complet": [4, 5, 7, 8, 20, 21, 31, 35, 36, 37, 45, 47], "complex": [2, 22, 27, 34], "complic": 2, "compon": 5, "compos": 5, "composit": 3, "comprehens": 37, "compress": [1, 2, 7, 11, 13], "compression_level": [2, 7], "comput": [2, 4, 5, 7, 18, 20, 26, 31, 36], "concat": 5, "concat_w": 5, "concaten": [5, 26], "concatenated_arrai": 26, "concept": [1, 2, 4, 7, 26, 34, 46], "concis": 21, "concurr": [1, 7, 34, 36], "condit": 28, "config": [1, 7, 20, 21, 34], "config_intern": [1, 7], "config_opt": [1, 7], "configopt": 20, "configur": [1, 3, 7, 13, 20, 21, 46], "configure_formatt": [3, 7, 37], "conflict": [1, 7, 11], "conjunct": 2, "connect": [1, 23], "consecut": 5, "consequ": 46, "consid": [5, 46], "consider": 37, "consist": [4, 5, 37], "consol": [2, 36], "constraint": 3, "construct": [2, 4, 7, 18, 31, 36, 45], "constructor": [0, 2, 4, 7, 14, 15, 16, 17, 18], "consum": [8, 16, 36, 39], "contain": [2, 3, 4, 5, 7, 13, 14, 18, 19, 20, 21, 24, 26, 29, 31, 37, 41], "content": [21, 28, 37], "context": [0, 3, 5, 7, 11, 14, 16, 19, 20, 31, 32, 34, 37, 39, 46], "continu": [5, 20], "contrast": 29, "contribut": [21, 47], "contributor": 20, "control": [1, 2, 5, 7, 24, 31, 33, 34, 36, 41], "conveni": [7, 18, 35], "convent": [4, 7, 20], "convers": [7, 18, 20, 21, 23, 36, 46], "convert": [0, 1, 2, 4, 5, 7, 13, 14, 15, 16, 17, 18, 20, 23, 25, 26, 27, 31, 32, 36, 46], "copi": [7, 15, 20, 21, 22, 31, 39], "copyto": 4, "core": [2, 4, 7, 20, 34, 35, 47], "corr": [5, 24], "correctli": [2, 18, 20], "correl": 5, "correspond": [5, 29], "cosh": [4, 5, 7], "cosin": [4, 5, 7], "cost": [22, 31], "costli": 18, "cot": [4, 5, 7], "cotang": [4, 5, 7], "could": [2, 4, 7, 20, 35], "count": [2, 5, 7, 24, 25, 34, 36], "count_star": 5, "counterpart": [5, 20], "coupl": [20, 33], "covar": 5, "covar_pop": [5, 24], "covar_samp": [5, 24], "covari": 5, "cover": [23, 27, 34], "cpython": [18, 21], "cr": [31, 45, 47], "crate": [20, 47], "creat": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 19, 20, 21, 23, 24, 26, 31, 32, 34, 39, 46], "create_datafram": [1, 31, 35], "create_dataframe_from_logical_plan": 1, "create_namespace_if_not_exist": 35, "create_t": 35, "createcatalog": 4, "createcatalogschema": 4, "created_bi": [2, 7], "createexternalt": 4, "createfunct": 4, "createfunctionbodi": 4, "createindex": 4, "creatememoryt": 4, "createview": 4, "creation": 36, "credenti": 35, "criteria": [5, 33], "crlf": [7, 13], "css": [3, 37], "cstream": 20, "cstring": 20, "csv": [0, 1, 2, 7, 11, 13, 20, 22, 24, 27, 33, 34, 35, 36, 37, 38, 42, 46], "csvreadopt": [1, 7, 11, 13, 41], "ctx": [0, 1, 2, 5, 7, 14, 16, 18, 20, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 43, 44, 45, 46], "cube": [4, 5, 7], "cubon": 24, "cume_dist": [5, 18, 33], "cumul": 5, "curr_valu": 31, "current": [0, 2, 3, 4, 5, 7, 14, 18, 20, 27, 31, 33, 37], "current_d": 5, "current_tim": 5, "custom": [1, 3, 7, 8, 14, 20, 29, 31, 34, 36, 42, 46], "custom_css": [3, 37], "custom_formatt": 3, "custom_html": 37, "customer_id": 29, "d": [2, 18, 39], "dai": [5, 27], "dant": 26, "dark": [24, 37], "data": [0, 1, 2, 3, 4, 5, 7, 11, 13, 14, 15, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 43, 45, 46], "data_page_row_count_limit": [2, 7], "data_pagesize_limit": [2, 7], "data_typ": [5, 7, 13, 20], "data_type_or_field_to_field": 18, "data_types_or_fields_to_field_list": 18, "databas": [0, 7, 26, 34], "dataflow": [7, 14], "datafram": [0, 1, 3, 4, 5, 6, 7, 11, 14, 15, 18, 19, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 37, 38, 39, 46], "dataframe_formatt": [7, 19], "dataframehtmlformatt": 3, "dataframewriteopt": [2, 7], "datafus": [19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46], "datafusion_catalog": 47, "datafusion_catalog_provid": 47, "datafusion_df": 35, "datafusion_logical_extension_codec": 47, "datafusion_sql": 17, "datafusion_table_funct": 31, "datafusion_table_provid": [20, 45], "dataset": [0, 1, 3, 7, 27, 33, 34, 35, 37, 38], "datasourc": 8, "datastructur": [4, 7], "datatyp": [1, 2, 4, 7, 11, 13, 18, 31], "datatypemap": [4, 7, 20], "date": [5, 27, 36], "date32": 5, "date_bin": 5, "date_part": [5, 27], "date_trunc": 5, "datepart": 5, "datetrunc": 5, "ddd": 37, "ddl": [1, 7], "dealloc": 4, "debug": 20, "decid": [18, 20], "decim": 5, "decimal_plac": 5, "decod": 5, "decor": [7, 18], "def": [2, 7, 18, 22, 31, 35, 36, 37, 38, 46], "default": [0, 1, 2, 3, 4, 5, 7, 10, 11, 13, 17, 18, 20, 21, 29, 31, 33, 34, 35, 37, 46], "default_max_infer_schema": [1, 7, 13], "default_str_repr": 2, "default_valu": 5, "defaultstyleprovid": 3, "defens": [22, 27, 35, 38, 46], "defin": [0, 1, 2, 4, 7, 18, 20, 21, 27, 28, 33], "definit": [1, 4, 7, 18, 20, 23, 31], "degre": [4, 5, 7], "delet": [1, 7], "delimit": [1, 5, 7, 11, 13, 41], "delta": 20, "delta_binary_pack": [2, 7], "delta_byte_arrai": [2, 7], "delta_length_byte_arrai": [2, 7], "delta_t": 35, "deltalak": 35, "deltat": 35, "demand": [36, 39], "demonstr": [1, 31, 33, 34, 38, 39], "dens": 5, "dense_rank": [5, 18, 33], "depend": [5, 20, 24, 31, 33, 47], "deprec": [1, 2, 3, 5, 6, 33], "deprecationwarn": 3, "deregist": [0, 7], "deregister_schema": [0, 7], "deregister_t": [0, 1], "deriv": 20, "descend": 5, "describ": [2, 5, 20, 24, 25, 31, 35], "describet": 4, "descript": [1, 5, 7, 18], "deseri": 16, "deserialize_byt": 16, "design": [20, 28], "desir": 5, "detail": [2, 4, 5, 7, 18, 21, 23, 25, 36, 41], "determin": [0, 2, 4, 5, 7, 18, 31], "dev": 21, "develop": [20, 35], "deviat": 5, "df": [1, 2, 4, 5, 22, 23, 24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 43, 44, 46], "df1": 36, "df2": 36, "df_filter": 32, "df_view": 32, "dfschema": 7, "dialect": [17, 46], "dict": [1, 2, 3, 4, 5, 7], "dictionari": [1, 2, 5, 7, 26, 32, 35, 36, 46], "dictionary_en": [2, 7], "dictionary_page_size_limit": [2, 7], "differ": [2, 4, 5, 7, 13, 14, 18, 20, 27, 31, 34, 35, 36, 37], "difficult": 20, "digest": 5, "dimens": [4, 5, 7], "dimension": 2, "directli": [3, 4, 5, 7, 16, 18, 20, 35, 36, 39, 47], "directori": [1, 7, 21], "disabl": [1, 7, 29, 37], "discard": 2, "discuss": [20, 35], "disk": [1, 7, 9, 10], "displai": [3, 7, 14, 23, 25, 32, 36, 38], "display_graphviz": [7, 14], "display_ind": [7, 14], "display_indent_schema": [7, 14], "display_nam": [4, 7], "distanc": 5, "distinct": [2, 4, 5, 7, 35], "distinctli": 20, "distinguish": 20, "distribut": [5, 34], "div": 37, "divid": [5, 34], "divis": [4, 5, 7], "divisor": 5, "dml": [1, 7], "dmlstatement": 4, "do": [0, 1, 2, 7, 18, 20, 21, 23, 24, 31, 33, 35, 45, 46], "doc": [1, 2, 5, 17], "docstr": 21, "document": [1, 2, 4, 5, 7, 18, 19, 20, 21, 23, 34, 36, 38, 41], "doe": [2, 3, 5, 7, 13, 18, 20, 21, 33, 35], "doesn": 18, "dominant_typ": 27, "done": [2, 24, 31, 35], "dot": [7, 14], "doubl": [2, 30, 39], "double_func": [7, 18], "double_it": [7, 18], "double_udf": [7, 18], "down": [20, 33, 35], "downcast": [20, 47], "download": [23, 30, 38], "downstream": 39, "dragon": [22, 24, 27, 33, 35, 38], "dragonair": 33, "dratini": 33, "drop": [1, 2, 7, 36], "dropcatalogschema": 4, "dropfunct": 4, "droptabl": 4, "dropview": 4, "dtype": [2, 37], "duckdb": 17, "due": [1, 7, 11, 21, 47], "duplic": [2, 4, 5, 7, 28, 37], "dure": [7, 18, 21], "dyn": 47, "dynamic_lookup": 21, "e": [4, 5, 7, 18, 31], "each": [1, 2, 4, 5, 7, 18, 20, 21, 24, 27, 33, 36, 37], "eagerli": 36, "earli": 20, "easi": [20, 43], "easier": [20, 27, 38], "easili": [4, 7, 20], "east": 35, "effect": [30, 38], "effici": 34, "effort": [2, 7, 20], "either": [1, 2, 4, 7, 18, 20, 24, 35, 36, 46], "electr": [24, 33], "element": [2, 3, 4, 5, 7, 26], "ellipsi": [2, 7, 18], "els": [31, 47], "else_expr": 4, "embed": [2, 7], "employe": 26, "empti": [1, 4, 5, 7, 13, 14, 24, 26, 36], "empty_t": 1, "emptyrel": 4, "enabl": [1, 2, 7, 20, 34, 36, 37], "enable_cell_expans": [3, 7, 37], "enable_url_t": 1, "encod": [2, 5, 7, 16], "encount": 20, "encourag": 21, "end": [0, 2, 4, 5, 7, 14, 15, 17, 18, 33, 34], "end_bound": [4, 7], "end_posit": 5, "ends_with": 5, "engin": [1, 7, 22, 23], "enough": 18, "ensur": [3, 7, 8, 13, 20, 34], "ensure_expr": 4, "ensure_expr_list": 4, "entir": [2, 5, 7, 18, 24, 31, 33, 36], "entri": [2, 5, 24, 33, 36], "enum": [2, 4, 7, 18], "enumer": 18, "environ": [21, 22, 34, 36, 37], "equal": [2, 4, 5, 7, 14], "equival": [2, 5, 18, 20, 24, 33, 36], "error": [1, 3, 4, 7, 13, 21], "escap": [7, 13, 41], "especi": 21, "essenti": [7, 15, 25], "etc": [1, 3, 4, 7, 9, 10, 18], "eval_rang": 18, "evalu": [2, 4, 5, 7, 18, 23, 24, 26, 31, 33, 34, 36], "evaluate_al": [7, 18, 31], "evaluate_all_with_rank": [18, 31], "even": [1, 3, 7, 20, 29, 37], "evenli": 34, "event": 36, "everi": [5, 7, 18], "everyth": [9, 10], "ex": [4, 7, 30], "exact": 20, "exactli": [2, 33], "examin": [4, 7], "exampl": [1, 2, 3, 4, 5, 7, 14, 18, 20, 23, 24, 26, 27, 29, 31, 32, 33, 35, 36, 38, 45, 46, 47], "exce": 3, "excel": 20, "except": [2, 4, 5, 7, 26, 46], "except_al": 2, "exclud": [2, 29], "execut": [1, 2, 4, 7, 13, 18, 20, 21, 22, 23, 34, 39], "execute_stream": [2, 7, 15, 36], "execute_stream_partit": [2, 36], "execution_plan": 2, "executionplan": [1, 2, 7, 14], "exeggcut": 24, "exist": [0, 1, 2, 3, 4, 5, 7, 20, 36], "exp": [4, 5, 7], "exp_smooth": 31, "expand": [2, 3, 37], "expans": [3, 37], "expect": [4, 5, 7, 13, 18, 20, 26, 31, 33, 36, 46], "expens": 2, "experi": 21, "explain": [2, 4], "explan": [2, 20, 23], "explicit": [4, 34, 36, 37], "explicitli": [7, 18, 20, 36], "expon": 5, "exponenti": [4, 5, 7], "exponentialsmooth": 31, "export": [1, 2, 7, 15, 18, 20, 21, 35, 42], "expos": [2, 20, 31, 35, 36, 45], "expr": [1, 2, 5, 7, 11, 13, 18, 19, 31, 33, 36], "expr1": 5, "expr2": 5, "expr_type_error": 4, "express": [1, 2, 4, 5, 7, 13, 18, 24, 27, 28, 31, 33, 47], "exprfuncbuild": [4, 7], "extend": [5, 8, 20], "extens": [1, 3, 4, 7, 11, 13, 37, 41, 47], "extern": [0, 7], "extract": [3, 4, 5, 7, 27, 47], "extraenv": 21, "f": [4, 23, 24, 26, 27, 33, 35, 36, 46], "f2f2f2": 37, "face": 20, "fact": [20, 31], "factor": 34, "factori": [4, 5, 7, 18], "fail": [2, 21, 27], "fair": [1, 7], "fairi": [24, 33], "fals": [2, 3, 4, 5, 7, 13, 18, 20, 21, 22, 26, 27, 29, 30, 31, 34, 35, 36, 37, 38, 45], "familiar": 21, "far": 2, "faster": [2, 5, 7, 18, 34], "featur": [2, 5, 7, 20, 21, 29, 35, 39], "fetch": 21, "few": [20, 21, 23], "fewer": 3, "ffi": [0, 1, 7, 18, 35, 47], "ffi_": 20, "ffi_catalogprovid": 47, "ffi_logical_codec_from_pycapsul": 47, "ffi_logicalextensioncodec": [1, 47], "ffi_provid": 20, "ffi_tablefunct": 31, "ffi_tableprovid": [20, 45], "ffi_taskcontextprovid": 1, "field": [2, 3, 5, 7, 13, 18, 20, 27, 31], "fight": [24, 33], "file": [1, 2, 7, 9, 10, 11, 13, 16, 20, 21, 22, 23, 30, 34, 36, 37, 38, 40, 41, 43, 44], "file_compression_typ": [1, 7, 11, 13], "file_extens": [1, 7, 11, 13], "file_partition_col": [1, 7, 11], "file_sort_ord": [1, 7, 11, 13], "filenam": 21, "filetyp": 4, "fill": [2, 4, 5, 7, 13, 27, 33], "fill_nan": [4, 7], "fill_nul": [2, 4, 7, 28], "filter": [2, 4, 5, 7, 13, 14, 23, 26, 32, 33, 35, 36], "final": [23, 31], "find": [5, 20, 21, 23, 24, 33], "find_in_set": 5, "finer": 36, "finish": [4, 24], "fire": [22, 24, 27, 35, 38], "first": [1, 2, 4, 5, 7, 20, 21, 23, 24, 26, 33, 35, 36, 38], "first_1": 24, "first_2": 24, "first_arrai": 5, "first_nam": 36, "first_valu": [5, 18, 24], "fix": [0, 18], "flag": [3, 5, 17, 18, 21], "flat": 4, "flatten": [4, 5, 7], "fleur": 27, "flexibl": 36, "float": [2, 5, 7, 18, 31, 37, 46], "float64": [7, 18, 27, 31], "floor": [4, 5, 7], "flow": [7, 14], "flower": 27, "fly": [22, 24, 33, 35, 38], "fn": [31, 45, 47], "focus": 20, "folder": [20, 31, 35, 45], "follow": [0, 1, 2, 4, 5, 7, 18, 20, 21, 23, 24, 26, 27, 29, 31, 33, 35, 38, 46], "foo": 34, "footer": 3, "fora": [4, 7], "foreign": [20, 47], "foreign_provid": 20, "foreigntableprovid": 20, "form": [2, 5, 7, 14, 24, 35], "format": [1, 2, 3, 4, 5, 6, 7, 13, 14, 23, 34, 35, 36, 40, 43, 46], "format_argu": 5, "format_html": [3, 37], "format_str": 3, "formatt": [2, 3, 5, 7], "formatted_valu": 3, "formatter_class": 37, "formattermanag": 3, "formatting_context": 37, "forth": 39, "found": [2, 5, 21, 33, 41, 45, 47], "four": 31, "frame": [2, 4, 5, 7, 18, 25, 31], "frame_bound": 4, "framework": 21, "free": [1, 7], "frequent": [20, 21], "fresh": 37, "from": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 13, 14, 16, 18, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47], "from_arrai": [31, 35, 36], "from_arrow": [1, 35, 36, 39], "from_arrow_t": 1, "from_dataset": [0, 7], "from_json": 16, "from_panda": [1, 36], "from_polar": [1, 35], "from_proto": [7, 14], "from_pycapsul": [7, 18], "from_pydict": [1, 25, 26, 29, 31, 32, 35, 39], "from_pylist": [1, 29, 35], "from_str": 2, "from_stream": 36, "from_substrait_plan": 16, "from_unixtim": [4, 5, 7], "from_val": 5, "frozen": [20, 21], "full": [2, 5, 28, 36, 37, 46], "full_nam": 36, "fulli": [21, 34], "func": [1, 2, 7, 18, 31], "function": [1, 2, 12, 15, 19, 20, 23, 25, 28, 34, 35, 37, 44, 47], "function_to_impl": [18, 31], "further": [5, 20], "futur": [18, 20, 33], "g": [5, 18], "gamma": 35, "gather": 36, "gcd": 5, "gener": [4, 7, 14, 15, 16, 18, 19, 20, 21, 22, 35, 36, 38, 46], "geodud": 33, "get": [2, 3, 7, 14, 16, 18, 20, 21, 26, 29, 35, 36, 37, 46, 47], "get_cell_styl": 3, "get_default_level": 2, "get_formatt": [3, 37], "get_frame_unit": [4, 7], "get_header_styl": 3, "get_lower_bound": [4, 7], "get_offset": 4, "get_rang": 18, "get_table_styl": 37, "get_upper_bound": [4, 7], "get_value_styl": 37, "getattr": 47, "getenv": 35, "ghost": 24, "gil": 22, "git": 21, "github": [4, 5, 7, 21], "give": [5, 24, 30, 38], "given": [0, 1, 3, 4, 5, 7, 18, 31], "glanc": 20, "global": [1, 2, 3, 7, 11, 36, 37], "global_ctx": 1, "go": [20, 25, 27], "goe": 20, "gold": 5, "good": [20, 21], "googlecloud": [12, 35], "graph": [7, 14], "graphic": [7, 14], "graphviz": [7, 14], "grass": [22, 24, 27, 33, 35, 38], "great": 21, "greater": [4, 5, 7], "greatest": 5, "greatli": [31, 34], "greedi": [1, 7], "green": 26, "grimer": 33, "ground": 24, "group": [1, 2, 4, 5, 7, 11, 18, 23, 24, 25, 33, 36], "group_bi": [2, 24], "groupingset": 4, "guarante": [2, 7, 20], "guid": [1, 2, 28, 34, 36, 37, 38], "guidanc": 36, "gz": 41, "gzip": [2, 7, 13, 41], "ha": [1, 2, 3, 5, 7, 13, 18, 20, 26, 30, 31, 35, 46], "handl": [3, 15, 24, 28, 33, 34, 46], "happen": 21, "hardwar": 34, "has_head": [1, 7, 11, 13], "has_mor": [2, 3, 37], "hasattr": 47, "hash": [2, 4, 5, 7, 34], "haskel": 25, "have": [1, 2, 5, 7, 11, 13, 16, 18, 20, 21, 22, 24, 29, 31, 33, 34, 35, 41, 45, 46], "head": 2, "header": [1, 2, 3, 7, 11, 13, 41], "healthi": 21, "heavy_red_unit": 26, "height": [3, 37], "help": [1, 3, 7, 11, 20, 21, 26, 27, 34, 37], "helper": [2, 4, 7, 18, 20, 47], "henc": 18, "here": [2, 5, 7, 20, 27, 30, 31, 33, 34, 37, 38, 39, 46], "hex": 5, "hexadecim": [4, 5, 7], "hierarch": 35, "high": [4, 7], "higher": [2, 4, 7, 34], "highli": 31, "highlight": 38, "hint": [1, 18, 21], "hive": 46, "homebrew": 21, "hood": 35, "host": 1, "how": [1, 2, 4, 5, 7, 18, 20, 24, 25, 28, 29, 31, 33, 34, 35, 36, 37, 38, 41, 46], "howev": [18, 31], "hp": [22, 35, 38], "html": [1, 2, 3, 5, 7, 17], "html_formatt": [3, 7, 19, 37], "http": [1, 2, 4, 5, 7, 12, 14, 16, 17, 35], "hyperbol": [4, 5, 7], "i": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 20, 21, 22, 23, 24, 26, 27, 29, 31, 33, 34, 35, 36, 37, 38, 40, 41, 43, 44, 46, 47], "ic": 24, "iceberg_t": 35, "id": [1, 2, 3, 21, 29, 36], "id_for_stud": 2, "ideal": 20, "ident": [2, 29, 35], "identifi": [1, 4, 7, 20, 30, 34, 35], "idl": [2, 7], "idx": [18, 31], "ignor": [5, 7, 13, 18, 21, 24, 33], "ignore_nul": [24, 33], "ilik": 4, "illustr": 20, "immut": [7, 18, 20, 31], "impact": [2, 34], "impl": [31, 45, 47], "implement": [0, 1, 2, 3, 7, 18, 21, 31, 35, 36, 37, 39, 45, 46, 47], "import": [1, 2, 3, 4, 7, 18, 20, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46], "improv": [1, 3, 7, 18, 31, 34, 35, 37, 47], "in_list": [5, 27], "includ": [1, 2, 3, 4, 7, 18, 20, 23, 24, 27, 29, 31, 34, 36, 37, 47], "include_rank": [18, 31], "inclus": [1, 4, 5, 7], "incomplet": 41, "incorpor": 31, "increas": [1, 2, 3, 7, 24, 34], "increment": [2, 18, 20, 31, 39], "incur": 31, "indent": [7, 14], "independ": [5, 33], "index": [2, 3, 4, 5, 7, 17, 18, 26], "indic": [3, 4, 5, 7, 26], "individu": [2, 7, 26, 33], "infer": [1, 7, 11, 13], "inform": [1, 2, 4, 7, 8, 16, 17, 18, 20, 31, 34, 36], "information_schema": [1, 7], "infrastructur": 34, "inherit": 35, "init": 21, "initcap": [4, 5, 7], "initi": [2, 3, 4, 5, 7, 13], "inlin": [18, 20], "inlist": 4, "inner": [2, 20, 28, 36, 47], "input": [1, 2, 4, 5, 7, 11, 13, 14, 18, 19, 26, 31], "input_field": [7, 18], "input_item": [8, 9, 10], "input_typ": [7, 18], "inputsourc": 8, "insert": [1, 2, 7], "insert_oper": [2, 7], "insertop": [2, 7], "insight": 34, "instanc": [1, 2, 3, 4, 5, 7, 18, 20, 23], "instanti": [2, 7, 18], "instead": [1, 3, 4, 5, 7, 20, 31, 34, 35, 36, 41, 47], "insubqueri": 4, "insuffici": 35, "int": [1, 2, 3, 4, 5, 7, 11, 13, 14, 18, 31, 46], "int32": [7, 18, 27], "int64": [7, 18, 25, 27, 31], "integ": [3, 4, 5, 7, 18, 26, 31], "integr": [2, 20, 35, 36, 38, 45], "intend": [7, 13, 20], "intens": 34, "interact": [3, 20, 26, 35], "intercept": 5, "interchang": [7, 15, 43], "interest": 26, "interfac": [0, 1, 2, 5, 7, 15, 20, 22, 23, 31, 33, 35, 36, 39, 45, 47], "interior": 20, "intermedi": [7, 18], "intern": [0, 4, 7, 13, 20, 31], "intersect": [2, 5], "interv": 5, "into_view": 2, "intro": 21, "introduc": [21, 23, 34, 46], "intuit": 20, "invalid": [3, 7], "invers": [4, 5, 7], "invoc": 18, "invok": 39, "io": [1, 2, 7, 16, 19, 37], "io_avro": 36, "io_csv": 36, "io_json": 36, "io_parquet": 36, "ipc": 2, "is_caus": 18, "is_correct_input": [8, 9, 10], "is_current_row": 4, "is_empti": 26, "is_follow": 4, "is_not_nul": [4, 7, 24], "is_nul": [4, 7, 31], "is_null_arr": 31, "is_preced": 4, "is_unbound": 4, "isfals": 4, "isnan": [4, 5, 7], "isnotfals": 4, "isnotnul": [4, 7], "isnottru": 4, "isnotunknown": 4, "isnul": 4, "issu": [3, 5, 21, 33, 37], "istru": 4, "isunknown": 4, "iszero": [4, 5, 7], "item": 4, "iter": [2, 4, 7, 15, 36], "its": [2, 5, 7, 14, 18, 20, 22, 33, 37], "itself": 26, "ivi": 27, "ivyfleur": 27, "ivysaur": [22, 27, 33, 35, 38], "java": 25, "javascript": [3, 37, 43], "jigglypuff": 33, "join": [1, 2, 4, 7, 14, 28, 34, 36], "join_kei": [2, 29], "join_on": [2, 36], "joinconstraint": 4, "jointyp": 4, "json": [1, 2, 7, 11, 16, 35, 36, 37, 42], "jupyt": [3, 36, 38], "jupyterlab": 38, "just": [2, 37], "justif": 20, "jynx": 24, "kakuna": [22, 33, 35, 38], "keep": [20, 21, 32, 35, 37], "kei": [1, 2, 3, 4, 7, 20, 23, 26, 28, 34, 36], "kept": 2, "kind": [0, 7, 20, 21], "know": [1, 7], "known": 26, "kv_meta": [2, 7], "kwarg": [3, 7, 8, 9, 10], "l179": [4, 7], "lab": 38, "label": 2, "lack": 22, "lag": [5, 18, 33], "lambda": [7, 18], "languag": [1, 7, 14, 20], "larg": [2, 3, 7, 27, 34, 37], "large_trip_dist": 30, "larger": [2, 7], "last": [4, 5, 7, 33], "last_nam": 36, "last_valu": [5, 24, 33], "last_with_nul": 33, "last_wo_nul": 33, "latenc": 34, "later": [35, 38, 45], "latest": [5, 17, 20], "latter": 1, "lazi": [23, 36], "lazili": [2, 36, 39], "lcm": 5, "lead": [2, 5, 18, 20, 31, 33, 35], "leaf": [7, 14], "leak": 22, "learn": [20, 25, 33], "least": [5, 18, 24, 29, 31, 37], "leav": [7, 14, 20], "left": [2, 5, 27, 28, 37], "left_on": [2, 29], "leftmost": 5, "legendari": [22, 35, 38], "len": [18, 27], "length": [2, 3, 4, 5, 7, 13, 18], "less": [4, 5, 7, 13], "lesson": 20, "let": [20, 31, 34, 39, 45, 47], "letter": [4, 5, 7, 30], "level": [2, 4, 5, 7, 20, 35], "levenshtein": 5, "leverag": [2, 7, 20], "lib": 21, "lib_dir": 21, "lib_nam": 21, "librari": [7, 8, 15, 20, 22, 38, 39, 45, 47], "lieu": [7, 18], "lightweight": 43, "like": [1, 2, 3, 4, 5, 7, 8, 20, 21, 22, 27, 31, 35, 36, 46, 47], "limit": [2, 3, 4, 5, 21, 23, 25, 27, 34, 36, 37, 46], "line": [1, 2, 7, 11, 13, 14, 20, 41], "linear": [5, 24], "link": [21, 33], "lint": 21, "linter": 21, "list": [0, 1, 2, 3, 4, 5, 7, 11, 13, 14, 18, 24, 31, 32, 33, 35, 36], "list_": 31, "list_append": [5, 21], "list_cat": 5, "list_concat": 5, "list_dim": [4, 5, 7], "list_distinct": [4, 5, 7], "list_el": 5, "list_except": 5, "list_extract": 5, "list_indexof": 5, "list_intersect": 5, "list_join": 5, "list_length": [4, 5, 7], "list_ndim": [4, 5, 7], "list_posit": 5, "list_prepend": 5, "list_push_back": 5, "list_push_front": 5, "list_remov": 5, "list_remove_al": 5, "list_remove_n": 5, "list_repeat": 5, "list_replac": 5, "list_replace_al": 5, "list_replace_n": 5, "list_res": 5, "list_slic": 5, "list_sort": 5, "list_to_str": 5, "list_union": 5, "lit": [2, 4, 5, 7, 21, 23, 24, 26, 30, 33, 36, 39], "liter": [2, 4, 7, 27, 28, 30, 31, 32, 36], "literal_with_metadata": [4, 7], "ll": [27, 29, 33], "ln": [4, 5, 7], "load": [3, 34, 35], "load_catalog": 35, "local": [1, 20, 21, 34], "localfilesystem": [12, 35], "locat": [1, 9, 19], "locationinputplugin": [9, 10], "lock": 22, "log": [5, 27], "log10": [4, 5, 7], "log2": [4, 5, 7], "logarithm": [4, 5, 7], "logic": [1, 2, 4, 7, 14, 16, 17, 23, 24, 30, 36, 37, 47], "logical_plan": [2, 16], "logicalextensioncodec": 47, "logicalplan": [1, 2, 4, 7, 14, 16, 17], "lonely_trip": 30, "long": [3, 20], "long_tim": 26, "longer": [5, 37, 47], "look": [20, 21, 34], "loop": 36, "loss": 46, "low": [4, 7, 24], "low_passenger_count": 30, "lower": [4, 5, 7, 18, 27, 30], "lowercas": [2, 4, 5, 7], "lowest": [20, 24], "lpad": 5, "ltrim": [4, 5, 7], "lz4": [2, 7], "lz4_raw": [2, 7], "lzo": [2, 7], "m": [21, 31], "mac": 21, "machin": 21, "machop": 33, "made": 30, "magikarp": 24, "magnemit": 33, "mai": [1, 2, 5, 7, 11, 13, 18, 20, 31, 34, 35, 37, 38, 46], "main": [1, 23, 36], "maintain": [1, 2, 20, 23, 46], "major": [21, 22, 47], "make": [5, 20, 21, 22, 27], "make_arrai": 5, "make_d": 5, "make_list": 5, "manag": [1, 3, 7, 21, 34], "mani": [3, 20, 24, 31, 34, 36, 37], "manipul": [1, 7, 27, 36], "mankei": 33, "manner": 20, "manual": [21, 34], "map": [2, 46], "match": [2, 4, 5, 7, 13, 27, 29, 34], "materi": [2, 36, 39], "math": 24, "mathemat": [28, 30, 36], "maturin": 21, "max": [5, 24, 25], "max_cell_length": [3, 7, 37], "max_cpu_usag": 34, "max_height": [3, 7, 37], "max_memory_byt": [3, 37], "max_row": [3, 37], "max_row_group_s": [2, 7], "max_width": [3, 7, 37], "maximum": [1, 2, 3, 5, 7, 11, 13, 37], "maximum_buffered_record_batches_per_stream": [2, 7], "maximum_parallel_row_group_writ": [2, 7], "md5": [4, 5, 7], "mean": [3, 5, 20, 21, 25], "meaning": [5, 30], "meant": [7, 14], "measur": 34, "medal": 5, "median": [5, 24, 25], "member": 21, "memoiz": 18, "memori": [0, 1, 2, 3, 7, 14, 18, 22, 34, 39], "memory_catalog": [0, 7, 35], "memory_schema": [0, 35], "memtabl": 47, "mention": 26, "merg": [7, 18, 29, 31], "messag": [3, 37], "metadata": [1, 2, 4, 5, 7, 11, 18, 31], "metapod": [22, 24, 33, 35, 38], "method": [0, 1, 2, 3, 5, 7, 11, 13, 18, 20, 23, 25, 27, 29, 31, 32, 36, 37, 47], "metric": 2, "metrorid": 29, "microsecond": 5, "microsoftazur": [12, 35], "might": [3, 18, 24, 37], "millisecond": 5, "min": [5, 24, 25], "min_row": [3, 37], "minimum": [1, 2, 3, 5, 7, 37], "miss": [2, 7, 13, 21, 28], "mode": [2, 7], "model": 2, "modifi": [2, 7, 23], "modify_df": 2, "modul": [6, 20, 33, 36, 37], "modulo": [4, 7], "moment": [2, 20], "monitor": 34, "month": [5, 27], "more": [1, 2, 3, 4, 7, 16, 18, 20, 21, 23, 24, 27, 31, 34, 36], "most": [18, 20, 26, 33, 38], "mostli": 21, "much": [18, 21, 37], "multi": 22, "multipl": [1, 2, 3, 4, 5, 7, 18, 24, 27, 29, 31, 33, 34, 35, 36, 37], "must": [1, 2, 3, 4, 5, 7, 13, 18, 20, 24, 26, 30, 31, 33, 39, 45, 46, 47], "mutabl": 21, "mutat": 20, "my": 37, "my_capsul": 20, "my_catalog": 35, "my_catalog_nam": 35, "my_delta_t": 35, "my_provid": 20, "my_schema": 35, "my_schema_nam": 35, "my_tabl": [32, 35], "my_udaf": 31, "myaccumul": 31, "mycatalogprovid": 47, "myformatt": 37, "mysql": [17, 46], "mystyleprovid": 37, "mytablefunct": 31, "mytableprovid": [20, 45], "myusernam": 21, "n": [2, 5, 7, 41], "n_column": [2, 7], "n_file": [2, 7], "n_row_group": [2, 7], "name": [0, 1, 2, 3, 4, 5, 7, 11, 13, 18, 20, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 35, 38, 45, 46, 47], "name_pair": 5, "named_expr": 2, "named_param": 1, "named_struct": [5, 21], "nan": [4, 5, 7], "nanoarrow": [7, 18, 31], "nanosecond": 5, "nanvl": 5, "narrowli": 20, "nativ": [20, 35], "natur": [4, 5, 7], "nearest": [4, 5, 7], "nearli": [34, 35], "necessari": [20, 35, 47], "need": [0, 1, 2, 3, 4, 7, 11, 13, 18, 20, 21, 31, 33, 34, 35, 36, 37, 38, 46, 47], "neg": [4, 5], "negat": [4, 5, 7, 27], "nest": [1, 4, 34], "network": 34, "never": 2, "new": [1, 2, 3, 4, 5, 7, 17, 18, 20, 21, 26, 28, 31, 45, 46, 47], "new_bound": [20, 45], "new_nam": 2, "new_with_ffi_codec": 47, "newlin": [7, 13], "newlines_in_valu": [7, 13], "next": [5, 7, 15], "nice": 37, "node": [7, 14], "non": [2, 4, 5, 24, 33, 36], "none": [0, 1, 2, 3, 4, 5, 7, 11, 13, 15, 16, 18, 20, 31, 33, 35, 37, 45, 46, 47], "normal": 24, "not_red_unit": 26, "notat": [26, 43], "note": [1, 2, 4, 7, 23, 26, 35, 36], "notebook": [3, 36, 37, 38], "now": [5, 18, 20, 27, 29, 32, 47], "nr": 25, "nth": 37, "nth_valu": [5, 18, 24], "ntile": [5, 33], "null": [2, 4, 5, 7, 13, 25, 27, 29, 31, 39, 41], "null_count": 25, "null_first": 5, "null_regex": [7, 13], "null_treat": [4, 5, 7, 24, 33], "nullabl": [2, 7, 13, 18, 31], "nullif": [5, 27], "nulls_first": [4, 5, 7], "nulltreat": [4, 5, 7, 24, 33], "num": [2, 5, 34, 46], "num_centroid": 5, "num_el": 26, "num_row": [7, 18, 31], "number": [1, 2, 3, 4, 5, 7, 11, 13, 14, 18, 21, 26, 27, 31, 33, 34, 37, 46], "numer": [2, 3, 4, 5, 7, 34], "nvl": 5, "o": [34, 35, 36, 37], "obj": 47, "object": [1, 2, 3, 4, 7, 12, 13, 15, 16, 18, 20, 26, 31, 34, 36, 37, 39, 43, 46, 47], "object_stor": [7, 19, 35], "objectstor": 1, "obtain": [8, 22, 36], "obviou": 20, "occasion": 20, "occur": [7, 18, 36], "occurr": 5, "octet_length": [4, 5, 7], "oddish": 33, "offend": 21, "offer": [27, 36, 46], "offici": 20, "offset": [2, 4], "often": [33, 34], "ok": 47, "old": 2, "old_nam": 2, "older": 35, "olymp": 5, "omit": 29, "on_expr": 2, "onc": [3, 5, 8, 18, 31, 37, 39, 45], "one": [2, 4, 5, 7, 18, 20, 24, 29, 31, 33, 35, 36], "onli": [1, 2, 3, 4, 5, 7, 11, 13, 15, 18, 20, 24, 27, 29, 31, 33, 36, 37], "onlin": [1, 2, 4, 5, 7, 18, 20, 34], "op": [2, 4, 7], "open": [3, 21], "oper": [1, 2, 4, 5, 7, 14, 15, 18, 20, 22, 24, 26, 30, 31, 33, 34, 35, 37], "operand": [4, 7], "operatefunctionarg": 4, "opt": [20, 21], "optim": [2, 18, 20, 34], "optimized_logical_plan": 2, "option": [0, 1, 2, 4, 5, 7, 11, 18, 19, 21, 23, 24, 27, 34, 35, 36, 37, 38, 41, 46], "options_intern": [1, 7], "order": [1, 2, 4, 5, 7, 8, 11, 13, 18, 20, 26], "order_bi": [4, 5, 7, 24, 33], "org": [1, 2, 5, 7, 14], "organ": 35, "origin": [2, 4, 5, 7, 31, 35, 46], "other": [1, 2, 3, 4, 5, 7, 11, 14, 18, 20, 21, 22, 23, 26, 28, 31, 34, 36, 37, 41], "other_df": 2, "other_id": 2, "otherwis": [4, 5, 7], "our": [20, 21, 24, 35, 38], "out": [2, 5, 7, 20, 24, 25, 26, 27, 29, 30, 33, 38, 39, 46], "output": [2, 3, 4, 7, 14, 18, 21, 24, 26, 31, 32, 37], "over": [2, 3, 4, 7, 15, 18, 20, 22, 24, 33, 35, 36, 37, 41], "overhead": 18, "overlai": 5, "overlap": 5, "overrid": [2, 7, 31], "overridden": 24, "overwrit": [2, 7], "own": [8, 31, 37, 47], "owner": 0, "owner_nam": 0, "pa": [1, 4, 7, 15, 18, 31, 35, 36, 39], "packag": [5, 20, 21], "pad": [5, 37], "page": [2, 7, 19, 20, 31], "pair": 5, "panda": [1, 2, 23, 25, 35, 36, 46], "pandas_df": [35, 36], "para": 33, "parallel": [1, 2, 7, 13, 34], "param_attack": 46, "param_nam": 3, "param_valu": [1, 46], "paramet": [0, 1, 2, 3, 4, 5, 7, 11, 13, 15, 16, 18, 21, 26, 28, 29, 37, 46, 47], "parameter": 1, "parasect": 33, "parquet": [0, 1, 2, 7, 11, 20, 22, 23, 30, 34, 35, 36, 37, 42], "parquet_prun": [1, 7, 11], "parquetcolumnopt": [2, 7], "parquetwriteropt": [2, 7], "pars": [1, 2, 7, 13, 16], "parse_sql_expr": 2, "parser": 46, "part": [2, 5, 7, 27], "particular": [5, 34], "partit": [1, 2, 4, 5, 7, 11, 13, 14, 18, 24, 34, 36], "partition_bi": [2, 4, 5, 7, 33], "partition_count": [7, 14], "pass": [1, 2, 7, 18, 20, 21, 25, 26, 31, 34, 35, 36, 46], "passenger_count": 30, "path": [1, 2, 7, 11, 16, 21, 35, 36], "path_to_t": 35, "pathlib": [1, 2, 7, 11, 16], "pattern": [2, 5, 7, 13, 20, 34], "pc": [7, 18], "pcre": 5, "pd": [35, 36], "peopl": 5, "per": [2, 3, 7, 13, 18, 33, 36, 37], "percent": 5, "percent_rank": [5, 18, 33], "percentag": 5, "percentil": 5, "perform": [1, 2, 3, 4, 5, 7, 13, 18, 20, 21, 23, 24, 27, 31, 33, 34, 35], "period": 20, "persist": 46, "person": 21, "physic": [2, 7, 14, 23], "pi": 5, "pick": 5, "pinsir": 33, "pip": [22, 38], "pixel": [3, 37], "pl": 35, "place": [5, 20, 21, 38, 46], "placehold": [4, 46], "plain": [2, 4, 7, 36, 37], "plain_dictionari": [2, 7], "plan": [1, 2, 4, 7, 16, 17, 18, 19, 22, 23, 36], "plan_intern": 16, "plan_to_sql": 17, "planner": [7, 14], "pleas": [21, 30], "plu": 5, "plugin": [8, 9, 10], "point": [3, 4, 5, 7, 21, 36, 46], "pointer_width": 21, "poison": [22, 24, 33, 35, 38], "pokemon": [22, 24, 27, 33, 35, 38, 46], "polar": [1, 2, 35, 36], "polars_df": [35, 36], "poll": 36, "pool": [1, 7], "popul": 5, "popular": [27, 35], "portion": 20, "posit": [2, 3, 5, 7, 24], "possibl": [2, 4, 7, 18, 20, 27, 31, 33], "possibli": 4, "post": 21, "postgr": 17, "postgresql": 17, "potenti": [2, 7, 14, 46, 47], "pow": [5, 27], "power": [5, 27, 31, 33], "pr": 21, "practic": [2, 20, 31, 34], "preced": [3, 4, 5, 7, 18, 33, 46], "precis": [5, 34], "predic": [1, 2, 7, 11], "prefer": [20, 21], "prefix": [5, 20], "prepar": [4, 46], "prepend": 5, "presenc": 27, "present": [2, 20, 29], "preserve_nul": 2, "pretti": 17, "prevent": [3, 21, 37], "previou": [5, 31, 33], "previous": 1, "primari": [2, 7, 9, 26, 36], "primit": [7, 18, 20, 31], "principl": 34, "print": [0, 1, 2, 4, 7, 14, 18, 32, 34, 36, 37], "printabl": [7, 14, 18], "prior": [2, 35], "probabl": [2, 7, 24], "problem": 29, "process": [2, 18, 24, 33, 34, 36, 39], "processor": 34, "produc": [2, 5, 7, 14, 16, 18, 20, 33, 36], "product": [5, 34, 36], "program": 20, "programmat": [7, 14], "project": [2, 4, 7, 14, 15, 20, 21, 36, 39, 47], "proper": 34, "properti": [0, 2, 3, 4, 7, 14], "proto_byt": 16, "protobuf": [7, 14], "protocol": [1, 3, 18, 36], "provid": [0, 1, 2, 3, 4, 5, 7, 8, 9, 13, 15, 16, 17, 18, 20, 23, 24, 27, 31, 33, 34, 36, 38, 42, 46, 47], "prune": [1, 7, 11], "psychic": 24, "pub": [20, 47], "public": [0, 7], "pull": [21, 39], "pure": 21, "push": [21, 35], "pushdown_filt": 34, "put": 30, "py": [1, 7, 16, 20, 31, 34, 45, 47], "py_dict": 36, "py_list": 36, "pyani": 47, "pyarrow": [0, 1, 2, 4, 7, 11, 13, 15, 18, 20, 25, 31, 35, 39, 46], "pycapsul": [0, 1, 2, 7, 15, 18, 20, 31, 35, 39, 45, 47], "pycapsuleinterfac": [1, 2], "pyclass": [20, 21], "pyconfig": 20, "pydatatyp": 20, "pyiceberg": 35, "pymethod": [31, 45, 47], "pyo3": [21, 31, 35], "pyo3_build_config": 21, "pyo3_config_fil": 21, "pyo3_print_config": 21, "pypi": 38, "pyproject": 21, "pyresult": [31, 45, 47], "pysessioncontext": 20, "pyspark": 22, "pytabl": 20, "pytest": 21, "python": [0, 1, 2, 4, 7, 18, 25, 26, 29, 31, 34, 35, 38, 39, 45, 46, 47], "python3": 21, "python_typ": 20, "python_valu": [4, 7], "pythontyp": [4, 7, 20], "queri": [1, 2, 7, 14, 16, 18, 20, 22, 23, 27, 32, 34, 35, 36, 39], "quick": 25, "quit": 44, "quot": [2, 7, 13, 30], "r": [2, 4, 5, 7, 17, 20, 21, 41], "radian": [4, 5, 7], "rais": [2, 3, 4, 5, 7], "ram": 34, "random": [5, 18, 25, 26], "rang": [1, 2, 4, 5, 7, 18, 25, 26, 27, 31, 33, 34, 36], "rank": [5, 18, 31, 33], "ranks_in_partit": 18, "rare": 20, "rather": [5, 16, 36, 39], "ratio": 5, "raw": [1, 3, 4], "raw_sort": 4, "rawcatalog": [0, 7], "rawcataloglist": 0, "rawexpr": [4, 7], "rawschema": [0, 7], "rb": [7, 15], "re": [2, 20], "reach": 3, "read": [0, 1, 2, 7, 9, 10, 11, 13, 16, 20, 35, 36, 37, 38, 40, 41, 43, 44, 46], "read_avro": [1, 7, 11, 36, 40], "read_csv": [1, 7, 11, 22, 23, 24, 33, 35, 36, 38, 41], "read_json": [1, 7, 11, 36, 43], "read_parquet": [1, 7, 11, 23, 30, 34, 36, 44], "read_tabl": 1, "reader": [1, 7, 11, 13, 36, 39], "realiti": 24, "reason": [2, 20, 21], "rebuild": 21, "receiv": [5, 18, 20], "recent": [33, 35], "recommend": [2, 20, 21, 31, 34, 37, 46], "record": [1, 2, 3, 7, 13, 14, 15, 23, 30, 31, 35, 39, 40], "record_batch": [1, 2, 7, 19], "record_batch_stream": [7, 15], "recordbatch": [1, 2, 3, 7, 15, 31, 35, 36], "recordbatchread": 36, "recordbatchstream": [1, 2, 7, 15, 36], "recursivequeri": 4, "red": [26, 37], "red_or_green_unit": 26, "red_unit": 26, "reduc": [7, 13, 21, 29, 31, 37, 47], "ref": [5, 31, 36], "refer": [2, 3, 5, 20, 23, 27, 36, 37, 46, 47], "referenc": [1, 23], "reflect": 3, "refresh": 3, "regardless": 18, "regener": 21, "regex": [5, 7, 13], "regexp_count": 5, "regexp_instr": 5, "regexp_lik": 5, "regexp_match": [5, 27], "regexp_replac": [5, 27], "region": 35, "regist": [0, 1, 2, 3, 7, 8, 11, 20, 23, 28, 31, 35, 36, 45, 46], "register_avro": 1, "register_catalog": [0, 35], "register_catalog_provid": 1, "register_catalog_provider_list": 1, "register_csv": [1, 27, 41, 46], "register_dataset": [1, 35], "register_formatt": 3, "register_json": 1, "register_listing_t": 1, "register_object_stor": [1, 35], "register_parquet": [1, 35, 44], "register_record_batch": 1, "register_schema": [0, 7, 35], "register_t": [0, 1, 2, 35, 45], "register_table_provid": [1, 35], "register_udaf": 1, "register_udf": 1, "register_udtf": [1, 31], "register_udwf": 1, "register_view": [1, 32], "registr": 46, "regr_avgi": [5, 24], "regr_avgx": [5, 24], "regr_count": [5, 24], "regr_intercept": [5, 24], "regr_r2": [5, 24], "regr_slop": [5, 24], "regr_sxi": 5, "regr_sxx": [5, 24], "regr_syi": [5, 24], "regress": [5, 24], "regular": [3, 5, 27], "reject": 4, "rel": [5, 33], "relat": [7, 14, 29], "releas": [20, 33], "relev": 21, "reli": [21, 35, 46], "remain": [4, 5, 7, 20, 27], "remot": [9, 10, 34], "remov": [0, 1, 2, 4, 5, 7, 24, 47], "renam": 2, "renamed_ag": 26, "render": 3, "reorder": [2, 7, 15], "repartit": [1, 2, 4, 7, 34], "repartition_by_hash": [2, 34], "repeat": [5, 26], "repeated_arrai": 26, "replac": [2, 5, 7, 27, 46], "repo": 21, "report": [2, 21], "repositori": [20, 31, 34, 35], "repr": 3, "repr_row": 3, "repres": [1, 2, 4, 5, 7, 11, 13, 14, 15, 16, 23, 26, 31, 34, 36], "represent": [0, 1, 2, 3, 4, 5, 7, 11, 14, 16, 18, 46], "request": [3, 20, 21], "requested_schema": [1, 2, 7, 15], "requir": [2, 4, 7, 18, 20, 27, 34, 35, 41, 47], "reserv": [1, 7], "reset": [3, 37], "reset_formatt": [3, 37], "resolv": [3, 21, 33], "resourc": [20, 34], "respect": [5, 24, 31, 35], "respect_nul": [5, 24, 33], "rest": [7, 18], "result": [1, 2, 5, 7, 14, 15, 18, 22, 23, 24, 26, 29, 30, 31, 32, 33, 34, 36], "result_batch": 36, "result_dict": 32, "retriev": [0, 1, 4, 7, 29], "return": [0, 1, 2, 3, 8, 9, 10, 11, 13, 14, 15, 16, 18, 20, 26, 29, 31, 35, 36, 37, 39], "return_field": [7, 18], "return_typ": [7, 18, 31], "reus": 20, "reusabl": 4, "revers": [4, 5, 7], "review": [20, 21], "rex": [4, 7], "rex_call_oper": [4, 7], "rex_call_operand": [4, 7], "rex_typ": [4, 7], "rextyp": [4, 7], "rfc3339": [4, 5, 7], "rh": [4, 7], "rhyhorn": 33, "rich": [3, 37], "richer": 37, "right": [2, 5, 29], "right_on": [2, 29], "ritchi": 33, "rle": [2, 7], "rle_dictionari": [2, 7], "robin": 2, "rock": 24, "roll": 33, "root": [4, 5, 7, 14], "roption": 20, "round": [2, 5, 23], "rout": 20, "row": [1, 2, 3, 4, 5, 7, 11, 13, 18, 23, 24, 25, 26, 29, 31, 32, 33, 34, 36, 37, 41, 46], "row_count": 3, "row_idx": 3, "row_numb": [5, 18, 33], "rpad": 5, "rresult": 20, "rstring": 20, "rtrim": [4, 5, 7], "rubi": 25, "run": [1, 2, 7, 11, 13, 22, 32, 34, 38], "runnabl": 21, "runtim": [1, 7, 18, 34], "runtimeconfig": 1, "runtimeenvbuild": [1, 7, 34], "rust": [2, 5, 7, 13, 18, 20, 22, 31, 34, 35, 45], "rustc": 20, "rustflag": 21, "rustonomicon": 20, "rvec": 20, "rwlock": 20, "s3": 35, "safe": 20, "safeti": 22, "same": [2, 5, 7, 18, 20, 23, 31, 36], "sampl": [5, 25, 32, 35], "saur": 27, "save": 18, "scalar": [1, 4, 7, 18, 26, 28, 46], "scalarsubqueri": 4, "scalarudf": [1, 7, 18], "scalarudfexport": [7, 18], "scalarvalu": 18, "scalarvari": 4, "scan": [1, 7, 13, 14], "schema": [0, 1, 2, 3, 4, 7, 11, 13, 14, 15, 25, 37], "schema_infer_max_record": [1, 7, 11, 13], "schema_nam": [0, 4, 7], "schemaprovid": [0, 7, 46, 47], "schemaproviderexport": [0, 7], "scheme": 2, "scienc": 27, "score": 5, "script": [3, 34], "search": 5, "second": [5, 23, 24, 26, 35], "second_arrai": 5, "second_two_el": 26, "secret_access_kei": 35, "section": [20, 23, 25, 28, 29, 33, 35, 36], "see": [1, 2, 4, 5, 7, 15, 16, 17, 18, 20, 21, 23, 24, 26, 31, 33, 34, 36, 37], "seen": [18, 24], "select": [1, 2, 4, 5, 7, 11, 13, 15, 18, 23, 26, 27, 28, 29, 31, 32, 33, 35, 36, 39, 46], "select_column": 2, "select_expr": 2, "self": [7, 13, 15, 18, 31, 37, 45, 47], "semi": [2, 28], "send": 47, "sensit": [2, 7], "separ": [5, 36], "sequenc": [1, 2, 7, 18], "serd": 16, "seri": 26, "serial": [2, 7, 16, 40], "serialize_byt": 16, "serialize_to_plan": 16, "serv": 20, "session": [1, 2, 3, 7, 31, 34, 37, 38, 46, 47], "session_id": 1, "sessionconfig": [1, 7, 34], "sessioncontext": [0, 1, 2, 5, 7, 8, 14, 16, 18, 20, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 43, 44, 45, 46], "sessioncontextintern": 1, "set": [0, 1, 2, 3, 4, 5, 7, 11, 13, 17, 18, 20, 21, 23, 28, 29, 31, 34, 37, 41, 46], "set_custom_cell_build": 3, "set_custom_header_build": 3, "set_formatt": 3, "setter": 20, "setvari": 4, "sever": [30, 34, 36], "sha": [4, 5, 7], "sha224": [4, 5, 7], "sha256": [4, 5, 7], "sha384": [4, 5, 7], "sha512": [4, 5, 7], "sharabl": 20, "share": [3, 20, 21, 23, 26, 39], "shift_offet": 5, "shift_offset": 5, "short": 36, "shot": 18, "should": [1, 2, 4, 5, 7, 8, 11, 14, 16, 20, 21, 24, 29, 31, 33], "show": [2, 3, 20, 22, 23, 25, 26, 29, 31, 33, 34, 35, 36, 37, 38, 45, 46], "show_attack": 46, "show_column": 46, "show_truncation_messag": [3, 37], "showcas": 34, "shown": 3, "side": [4, 5, 7, 20], "sign": [4, 5, 7], "signatur": 47, "signific": 35, "significantli": [5, 34], "signum": [4, 5, 7], "silver": 5, "similar": [4, 5, 7, 20, 23, 26, 33, 46], "similarto": 4, "simpl": [2, 5, 21, 30, 34, 43, 44, 46], "simplest": [18, 31], "simpli": [2, 7, 18, 20, 21, 35, 37, 46], "simplic": [7, 18], "simplifi": 33, "simultan": 34, "sin": [4, 5, 7], "sinc": [2, 3, 24, 31, 35, 46], "sine": [4, 5, 7], "singl": [1, 2, 3, 4, 5, 7, 13, 14, 18, 24, 26, 31, 33, 34, 35, 36, 46], "single_file_output": [2, 7], "sinh": [4, 5, 7], "site": 41, "size": [1, 2, 5, 7, 26, 34, 37, 47], "skew": 34, "skip": [1, 2, 7, 11, 18, 41], "skip_arrow_metadata": [2, 7], "skip_metadata": [1, 7, 11], "slice": [4, 5, 7, 26], "slightli": 31, "slope": 5, "slow": 2, "slower": [2, 34], "slowest": 31, "slowpok": 33, "small": [34, 35], "smallest": [18, 24], "smooth_a": 31, "snappi": [2, 7], "snorlax": 33, "so": [1, 2, 4, 7, 18, 20, 21, 30, 31, 33, 36, 39, 45], "softwar": [7, 14, 20], "solid": 37, "some": [2, 5, 7, 18, 20, 21, 24, 27, 31, 33, 35, 36, 38, 45, 47], "sometim": [1, 7, 20, 35], "soon": 5, "sort": [1, 2, 4, 5, 7, 11, 13, 24, 33, 36], "sort_bi": [2, 7], "sort_express": 5, "sort_list_to_raw_sort_list": 1, "sortexpr": [1, 2, 4, 5, 7, 13], "sortkei": [1, 2, 4, 5], "sound": 20, "sourc": [1, 5, 7, 8, 9, 10, 11, 20, 21, 23, 34, 36, 37, 38, 39, 45], "sp": [22, 35, 38], "space": [4, 5, 7], "sparingli": 31, "special": [3, 18, 46], "specif": [0, 2, 3, 4, 5, 7, 14, 25, 26, 27, 33, 34, 36, 37, 46, 47], "specifi": [1, 2, 4, 5, 7, 13, 26, 27, 29, 31, 33, 34, 46], "speed": [2, 7, 22, 24, 33, 35, 38], "sphinx": 19, "spill": [1, 7], "spillabl": [1, 7], "split": [5, 31], "split_part": 5, "sql": [1, 2, 7, 14, 16, 17, 18, 22, 23, 26, 30, 32, 35, 36], "sql_type": 20, "sql_with_opt": 1, "sqlite": 17, "sqloption": [1, 7], "sqltabl": [8, 9, 10], "sqltype": [4, 7, 20], "sqrt": [4, 5, 7], "squar": [4, 5, 7], "squi": 27, "squirtl": [22, 27, 33, 35, 38], "src": [4, 7, 20, 21], "ssd": 34, "stabl": [18, 20, 31], "stai": 20, "standard": [5, 7, 18, 20], "start": [4, 5, 7, 13, 18, 26, 29, 30, 33, 37, 38, 41], "start_ag": 26, "start_bound": [4, 7], "started_young": 26, "starts_with": 5, "state": [1, 7, 18, 20, 23, 31], "state_typ": [7, 18, 31], "statement": [1, 4, 5, 7, 23, 46], "static": [0, 1, 2, 4, 7, 14, 16, 17, 18], "statist": [2, 7, 24, 25], "statistics_en": [2, 7], "statistics_truncate_length": [2, 7], "std": 25, "stddev": [5, 24], "stddev_pop": [5, 24], "stddev_samp": 5, "steel": 24, "stem": 20, "step": [5, 20], "still": [2, 7, 13, 20, 31, 35], "stop": 5, "storag": [34, 35], "store": [1, 4, 7, 12, 20], "str": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 46], "str_lit": 27, "straightforward": [7, 15, 40, 41], "strategi": 34, "stream": [1, 2, 7, 15, 20, 39], "strftime": 5, "stride": 5, "string": [0, 1, 2, 3, 4, 5, 7, 13, 14, 16, 17, 18, 20, 21, 24, 25, 26, 28, 31, 36, 46], "string1": 5, "string2": 5, "string_agg": [5, 24], "string_list": 5, "string_liter": [4, 7, 27], "string_view": [4, 7, 39], "strip": 2, "strong": 22, "strongli": 34, "strpo": 5, "struct": [1, 4, 5, 7, 20, 28, 39], "structarrai": 39, "structur": [7, 13, 14, 20], "style": [3, 20, 26, 36, 46], "style_provid": [3, 37], "styleprovid": [3, 37], "sub": [4, 7], "sub_expr": 5, "subclass": 20, "subfield": [4, 5, 7], "submit": 21, "submodul": 21, "subqueri": 4, "subqueryalia": 4, "subset": [2, 27, 30], "substitut": 1, "substr": 5, "substr_index": 5, "substrait": [7, 19], "subtl": 20, "subtract": [4, 7], "successfulli": [7, 13], "suffici": [1, 7, 34], "suffix": 5, "suggest": 35, "sum": [5, 7, 18, 24, 31, 34, 36], "sum_bias_10": [7, 18], "summar": [2, 7, 18], "summari": [2, 24, 25], "support": [0, 1, 2, 3, 4, 5, 7, 13, 14, 15, 16, 17, 18, 20, 29, 31, 35, 36, 37, 46], "supports_bounded_execut": [18, 31], "suppos": [5, 20, 24], "suppress_build_script_link_lin": 21, "sure": 21, "switch": 46, "symbol": 2, "sync": 21, "synchron": 20, "syntax": [5, 26], "synthet": 34, "system": [1, 2, 7, 20, 21, 34], "t": [5, 18, 30], "t1": 18, "tabl": [0, 1, 2, 3, 7, 8, 9, 10, 11, 13, 14, 18, 20, 23, 25, 27, 28, 29, 34, 36, 37, 38, 39, 41, 42, 44, 46], "table_exist": [0, 1], "table_id": 3, "table_nam": [0, 2, 8, 9, 10], "table_partition_col": [1, 7, 11, 13], "table_provid": 1, "table_uuid": [2, 3, 37], "tablefunct": [1, 7, 18, 47], "tableprovid": [20, 45, 47], "tableproviderexport": [0, 1, 7], "tablescan": 4, "tabular": 36, "tag": 3, "tail": 2, "take": [2, 3, 18, 20, 21, 24, 26, 31, 33, 35, 47], "taken": 34, "tan": [4, 5, 7], "tangent": [4, 5, 7], "tanh": [4, 5, 7], "target": [1, 2, 7, 34], "target_partit": [1, 7], "task": [27, 36], "taskcontextprovid": 47, "taxi": 23, "td": 37, "technic": 22, "techniqu": [34, 35], "templat": [7, 18], "tempor": 28, "temporari": [1, 2, 7, 37, 46], "temporarili": 37, "temporary_column": 36, "term": 33, "termin": [7, 13], "terminologi": 20, "test": [5, 7, 14, 21, 31, 34, 35], "text": [1, 2, 5, 37], "textual": 27, "th": [5, 37], "than": [2, 4, 5, 7, 13, 24, 31, 34, 36, 37, 39, 41], "thei": [7, 8, 18, 20, 26, 31, 46], "them": [2, 5, 7, 20, 21, 23, 25, 31, 35], "theme": 37, "then_expr": 4, "therefor": 2, "thi": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 37, 38, 39, 45, 46, 47], "third": 23, "thoroughli": 20, "those": [2, 4, 7, 22, 24, 31, 35, 46], "though": [20, 31], "thread": 22, "three": [31, 33, 35], "threshold": 46, "through": [2, 3, 20, 22, 28, 35, 36, 37, 38], "thu": 18, "thusli": 20, "ticket": 21, "tie": 5, "tight": 20, "tile": 5, "time": [5, 18, 20, 21, 24, 26, 27, 31, 34, 36, 47], "time64": 5, "timestamp": [4, 5, 7, 27, 31], "tip_amount": [23, 30], "tip_perc": 23, "tips_plus_tol": 30, "tlc": [23, 30], "tmp": [1, 7], "to_arrow_t": [2, 36], "to_hex": [4, 5, 7], "to_inn": [7, 13], "to_json": 16, "to_panda": [2, 25, 27, 36, 46], "to_polar": [2, 36], "to_proto": [7, 14], "to_pyarrow": [7, 15, 36], "to_pyarrow_dataset": 35, "to_pydict": [2, 32, 36], "to_pylist": [2, 36], "to_substrait_plan": 16, "to_timestamp": [5, 27], "to_timestamp_micro": 5, "to_timestamp_milli": 5, "to_timestamp_nano": 5, "to_timestamp_second": 5, "to_unixtim": 5, "to_val": 5, "to_vari": [4, 7, 14], "todo": 20, "togeth": [2, 5, 24], "toler": [2, 7], "tolls_amount": 30, "toml": 21, "top": 25, "topic": 35, "total": [2, 4, 5, 7, 22, 23, 26, 27, 35, 38], "total_amount": [23, 36], "total_as_float": 27, "total_as_int": 27, "touch": 21, "toward": 5, "tr": 37, "track": 20, "tracker": 5, "tradit": 21, "trail": 2, "trait": [18, 20], "transact": [1, 7], "transactionaccessmod": 4, "transactionconclus": 4, "transactionend": 4, "transactionisolationlevel": 4, "transactionstart": 4, "transfer": 20, "transform": [2, 7, 14, 23, 36], "translat": [5, 20], "treat": [4, 33, 41], "treatment": [4, 7], "tree": [7, 14, 26], "trigger": [2, 37, 39], "trim": [4, 5, 7], "trip": [23, 30, 35], "trip_dist": [23, 30], "trivial": 21, "true": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 13, 18, 21, 24, 26, 27, 30, 31, 33, 34, 37, 41], "trunc": 5, "truncat": [2, 3, 5, 7, 13, 24, 27, 30, 33, 37], "truncated_row": [7, 13], "try": [1, 7, 11, 13, 20], "trycast": 4, "tune": [2, 7, 34], "tupl": [1, 2, 5, 7, 11, 13, 15, 18], "turn": [0, 2, 4, 7, 20, 46], "tutori": 38, "two": [2, 4, 5, 7, 14, 15, 20, 24, 26, 29, 31, 33, 34, 35], "type": [0, 1, 2, 3, 4, 5, 7, 11, 13, 18, 20, 21, 22, 24, 26, 27, 30, 31, 33, 34, 35, 38, 46], "type_class": 3, "typeerror": [3, 4], "typeguard": 18, "typic": [0, 2, 4, 7, 15, 17, 18, 23, 35], "typing_extens": [7, 15], "u": [20, 24, 26, 35], "udaf": [1, 7, 18, 22, 31], "udaf1": [7, 18], "udaf2": [7, 18], "udaf3": [7, 18], "udf": [1, 7, 18, 22, 31], "udf4": [7, 18], "udtf": [7, 18], "udwf": [1, 7, 18], "udwf1": [7, 18], "udwf2": [7, 18], "udwf3": [7, 18], "ultim": 21, "unabl": 46, "unari": [7, 14], "unbound": [1, 4, 7, 18, 33], "unchang": [2, 27], "uncompress": [2, 7, 13], "undefin": 21, "under": [1, 20, 35], "underli": [4, 7, 13, 18, 20, 39], "understand": [20, 34, 36], "unfortun": 20, "unfrozen": 20, "unicod": [4, 5, 7], "unintend": 46, "union": [2, 4, 5], "union_distinct": 2, "uniqu": [1, 2, 3, 5, 7, 18, 24], "unit": [4, 7, 33, 35], "unixtim": 5, "unknown": 4, "unless": 20, "unlik": [4, 7, 33], "unmatch": 29, "unnest": [2, 4], "unnest_column": 2, "unnestexpr": 4, "unoptim": 2, "unpars": [7, 19], "unsaf": [20, 47], "unspil": [1, 7], "until": 23, "up": [2, 7, 14, 18, 23, 26, 33, 34, 37], "updat": [1, 2, 7, 18, 20, 31, 47], "upon": [5, 20, 31], "upper": [4, 5, 7], "uppercas": [4, 5, 7], "upstream": 20, "urbango": 29, "url": 1, "us": [0, 1, 2, 3, 4, 5, 7, 8, 9, 11, 13, 14, 15, 16, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 43, 44, 45, 46, 47], "usabl": 2, "usag": [1, 2, 4, 5, 7, 18, 24, 33], "use_shared_styl": [3, 7, 37], "user": [0, 1, 2, 4, 5, 7, 8, 14, 15, 16, 17, 18, 20, 21, 23, 26, 27, 28, 37, 38, 47], "user_defin": [1, 7, 19, 31], "user_id": 36, "uses_window_fram": [18, 31], "utc": 5, "utf8": [4, 5, 7], "utf8view": [4, 7], "util": [3, 20, 33, 34, 35], "uuid": 5, "uv": 21, "v": [21, 34], "v4": 5, "val": 46, "valid": [1, 2, 3, 4, 7, 8, 9, 10, 18], "validate_pycapsul": 47, "valu": [1, 2, 3, 4, 5, 7, 13, 14, 18, 24, 26, 28, 29, 30, 31, 33, 36, 37, 46], "value1": 5, "value2": 5, "value_i": 5, "value_x": 5, "valueerror": [2, 3, 7], "values_a": 31, "values_b": 31, "values_view": 2, "var": 5, "var_pop": [5, 24], "var_samp": [5, 24], "var_sampl": 5, "vari": 34, "variabl": [1, 5, 7, 21, 26, 46], "varianc": 5, "variant": [4, 7, 14, 29], "variant_nam": [4, 7], "varieti": [26, 31, 35, 41], "variou": [13, 36, 37, 38], "vastli": [21, 34], "vec": 20, "vendorid": 30, "venomoth": 33, "venonat": 33, "venu": 27, "venufleur": 27, "venufleurmega": 27, "venusaur": [22, 27, 33, 35, 38, 46], "venusaurmega": [22, 27, 33, 35, 38, 46], "venv": 21, "verbos": 2, "veri": [40, 41], "verifi": 38, "version": [2, 3, 7, 20, 21, 29, 35, 47], "versu": 34, "via": [1, 2, 4, 7, 15, 16, 18, 20, 21, 22, 26, 29, 31, 35, 36, 38, 45, 46, 47], "view": [0, 1, 2, 7, 25, 27, 28, 38, 46], "view1": 32, "vink": 33, "violat": 3, "virtual": [1, 7, 21], "visual": [7, 14, 23, 37], "volatil": [7, 18, 31], "voltorb": [24, 33], "volum": 34, "vulpix": 24, "wa": [1, 7], "wai": [20, 21, 24, 35, 36, 38], "wait": 20, "want": [20, 21, 24, 27, 31, 33, 37, 45], "wartortl": [22, 35, 38], "water": [22, 24, 27, 35, 38], "we": [0, 2, 7, 18, 20, 21, 23, 24, 26, 27, 29, 30, 31, 33, 34, 35, 38, 39, 46], "weedl": [22, 33, 35, 38], "weight": [5, 26], "welcom": [21, 38], "well": [1, 7, 20, 21, 31, 36, 46], "were": [20, 26], "what": [20, 23], "whatev": 18, "when": [1, 2, 3, 4, 5, 7, 11, 13, 18, 20, 21, 22, 24, 26, 29, 31, 33, 34, 35, 36, 37, 43, 46], "when_expr": 4, "whenev": [20, 21, 36], "where": [2, 4, 5, 7, 14, 24, 26, 27, 31, 32, 33, 35, 46], "wherea": 26, "wherev": 20, "whether": [1, 2, 3, 4, 5, 7, 11, 13, 18, 26], "which": [0, 2, 4, 5, 7, 13, 18, 20, 21, 22, 24, 26, 30, 31, 33, 35, 36, 37, 47], "while": [7, 18, 20, 29, 34], "white": 37, "who": [20, 47], "whole": 24, "why": 20, "wide": [26, 34, 35, 36], "width": [3, 37], "window": [1, 4, 5, 7, 18, 24, 28, 34], "window_fram": [4, 5, 7, 33], "windowevalu": [7, 18, 31], "windowexpr": 4, "windowfram": [4, 5, 7, 33], "windowframebound": [4, 7], "windowudf": [1, 7, 18], "windowudfexport": [7, 18], "wish": [20, 29, 31], "with_": [7, 13], "with_allow_ddl": [1, 7], "with_allow_dml": [1, 7], "with_allow_stat": [1, 7], "with_batch_s": [1, 7], "with_column": [2, 36], "with_column_renam": 2, "with_com": [7, 13, 41], "with_create_default_catalog_and_schema": [1, 7, 34], "with_default_catalog_and_schema": [1, 7, 34], "with_delimit": [7, 13, 41], "with_disk_manager_dis": [1, 7], "with_disk_manager_o": [1, 7, 34], "with_disk_manager_specifi": [1, 7], "with_escap": [7, 13, 41], "with_fair_spill_pool": [1, 7, 34], "with_file_compression_typ": [7, 13, 41], "with_file_extens": [7, 13, 41], "with_file_sort_ord": [7, 13], "with_greedy_memory_pool": [1, 7], "with_has_head": [7, 13, 41], "with_head": 2, "with_information_schema": [1, 7, 34], "with_logical_extension_codec": 1, "with_newlines_in_valu": [7, 13], "with_null_regex": [7, 13, 41], "with_parquet_prun": [1, 7, 34], "with_pretti": 17, "with_quot": [7, 13], "with_repartition_aggreg": [1, 7, 34], "with_repartition_file_min_s": [1, 7], "with_repartition_file_scan": [1, 7], "with_repartition_join": [1, 7, 34], "with_repartition_sort": [1, 7], "with_repartition_window": [1, 7, 34], "with_schema": [7, 13], "with_schema_infer_max_record": [7, 13], "with_table_partition_col": [7, 13], "with_target_partit": [1, 7, 34], "with_temp_file_path": [1, 7], "with_termin": [7, 13], "with_truncated_row": [7, 13, 41], "with_unbounded_memory_pool": [1, 7], "within": [0, 5, 7, 9, 18, 24, 26], "within_limit": 2, "without": [2, 4, 5, 7, 20, 21, 24, 27, 29, 30, 31, 36, 46], "won": 30, "word": [4, 5, 7], "work": [1, 2, 5, 7, 23, 27, 30, 34, 36, 37, 38, 46], "workflow": 21, "workload": 34, "worthwhil": [2, 7], "would": [4, 5, 8, 18, 20, 46], "wrap": [1, 2, 20, 31], "wrapper": [1, 7, 15, 20, 21, 35], "write": [2, 7, 8, 16, 20, 31, 35, 36, 47], "write_batch_s": [2, 7], "write_csv": 2, "write_json": 2, "write_opt": 2, "write_parquet": 2, "write_parquet_with_opt": 2, "write_t": 2, "writer": [2, 7], "writer_vers": [2, 7], "written": [2, 7, 20, 22, 35, 36], "x": [2, 5, 7, 18, 22, 27, 33, 35, 38, 46], "xor": 5, "xz": [7, 13], "y": [2, 5, 18, 22, 27, 33, 35, 38, 46], "year": 5, "years_in_posit": 26, "yellow": [23, 35], "yellow_tripdata_2021": [23, 30], "yet": [2, 35], "yield": [2, 5, 36, 39], "you": [0, 1, 2, 5, 7, 18, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 45, 46, 47], "your": [0, 2, 7, 18, 20, 21, 24, 25, 30, 31, 34, 35, 36, 37, 46, 47], "your_tabl": 36, "yourself": 20, "z": [1, 7, 18], "zero": [2, 5, 7, 15, 22, 39], "zstandard": [2, 7], "zstd": [2, 7, 13], "zubat": 24, "\u03c0": 5}, "titles": ["datafusion.catalog", "datafusion.context", "datafusion.dataframe", "datafusion.dataframe_formatter", "datafusion.expr", "datafusion.functions", "datafusion.html_formatter", "datafusion", "datafusion.input.base", "datafusion.input", "datafusion.input.location", "datafusion.io", "datafusion.object_store", "datafusion.options", "datafusion.plan", "datafusion.record_batch", "datafusion.substrait", "datafusion.unparser", "datafusion.user_defined", "API Reference", "Python Extensions", "Introduction", "DataFusion in Python", "Concepts", "Aggregation", "Basic Operations", "Expressions", "Functions", "Common Operations", "Joins", "Column Selections", "User-Defined Functions", "Registering Views", "Window Functions", "Configuration", "Data Sources", "DataFrames", "HTML Rendering in Jupyter", "Introduction", "Arrow", "Avro", "CSV", "IO", "JSON", "Parquet", "Custom Table Provider", "SQL", "Upgrade Guides"], "titleterms": {"0": 47, "52": 47, "The": 20, "addit": 37, "aggreg": [24, 31, 33], "altern": 20, "anti": 29, "apach": 35, "api": 19, "approach": 20, "argument": 36, "arrai": 26, "arrow": [20, 36, 39], "attribut": [4, 7, 12, 18], "avail": 33, "avro": 40, "base": [8, 36], "basic": [25, 37], "benchmark": 34, "best": 37, "boolean": 26, "build": 21, "built": 36, "cast": 27, "catalog": [0, 35], "class": [0, 1, 2, 3, 4, 7, 8, 9, 10, 13, 14, 15, 16, 17, 18, 20, 36], "code": 21, "column": [26, 30, 36], "commit": 21, "common": [28, 36], "concept": 23, "condit": 27, "configur": [34, 37], "consider": 34, "content": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], "context": [1, 23], "contextu": 37, "control": 37, "copi": 36, "core": 36, "cpu": 34, "creat": [35, 36, 37], "csv": 41, "custom": [35, 37, 45], "data": 35, "datafram": [2, 23, 35, 36], "dataframe_formatt": 3, "datafus": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 22, 39, 47], "defin": [31, 35], "delta": 35, "depend": 21, "detail": 20, "develop": 21, "displai": 37, "distinct": 24, "duplic": 29, "exampl": [22, 34], "execut": 36, "export": 39, "expr": 4, "express": [23, 26, 36], "extens": 20, "faq": 31, "ffi": 20, "file": 35, "fill_nul": 27, "filter": 24, "format": 37, "formatt": 37, "frame": 33, "from": [20, 39], "full": 29, "function": [3, 4, 5, 7, 11, 18, 24, 26, 27, 31, 33, 36], "guid": 47, "guidelin": [20, 21], "handl": 27, "hook": 21, "how": 21, "html": [36, 37], "html_formatt": 6, "iceberg": 35, "implement": 20, "import": [34, 39], "improv": 21, "inner": 29, "input": [8, 9, 10], "inspir": 20, "instal": [21, 22, 38], "introduct": [21, 38], "io": [11, 42], "issu": 20, "join": 29, "json": 43, "jupyt": 37, "kei": 29, "lake": 35, "left": 29, "librari": [35, 36], "liter": 26, "local": 35, "locat": 10, "manag": 37, "mathemat": 27, "maxim": 34, "memori": [35, 37], "miss": 27, "modul": [0, 1, 2, 3, 4, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18], "mutabl": 20, "name": 36, "null": [24, 33], "object": 35, "object_stor": 12, "oper": [25, 28, 36], "optim": 37, "option": [13, 31], "order": [24, 33], "other": [27, 35], "overview": 36, "packag": [7, 9], "paramet": [24, 33], "parameter": 46, "parquet": 44, "partit": 33, "perform": 37, "plan": 14, "practic": 37, "pre": 21, "primari": 20, "provid": [35, 37, 45], "pyarrow": 36, "pyo3": 20, "python": [20, 21, 22, 36], "queri": 46, "record_batch": 15, "refer": 19, "regist": 32, "render": [36, 37], "resourc": 37, "return": [4, 5, 7], "run": 21, "rust": 21, "scalar": 31, "schema": 35, "select": 30, "semi": 29, "separ": 21, "session": 23, "set": [24, 33], "share": 37, "sourc": 35, "speed": 21, "sql": 46, "statu": 20, "store": 35, "stream": 36, "string": 27, "struct": 26, "style": 37, "submodul": [7, 9], "substrait": 16, "tabl": [31, 35, 45], "tempor": 27, "termin": 36, "treatment": [24, 33], "udwf": 31, "unpars": 17, "updat": 21, "upgrad": 47, "usag": 34, "user": [31, 35], "user_defin": 18, "valu": 27, "view": 32, "window": [31, 33], "work": 20, "zero": 36}}) \ No newline at end of file diff --git a/src/array.rs b/src/array.rs deleted file mode 100644 index 1ff08dfb2..000000000 --- a/src/array.rs +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::ptr::NonNull; -use std::sync::Arc; - -use arrow::array::{Array, ArrayRef}; -use arrow::datatypes::{Field, FieldRef}; -use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema}; -use arrow::pyarrow::ToPyArrow; -use pyo3::ffi::c_str; -use pyo3::prelude::{PyAnyMethods, PyCapsuleMethods}; -use pyo3::types::PyCapsule; -use pyo3::{Bound, PyAny, PyResult, Python, pyclass, pymethods}; - -use crate::errors::PyDataFusionResult; -use crate::utils::validate_pycapsule; - -/// A Python object which implements the Arrow PyCapsule for importing -/// into other libraries. -#[pyclass( - from_py_object, - name = "ArrowArrayExportable", - module = "datafusion", - frozen -)] -#[derive(Clone)] -pub struct PyArrowArrayExportable { - array: ArrayRef, - field: FieldRef, -} - -#[pymethods] -impl PyArrowArrayExportable { - #[pyo3(signature = (requested_schema=None))] - fn __arrow_c_array__<'py>( - &'py self, - py: Python<'py>, - requested_schema: Option>, - ) -> PyDataFusionResult<(Bound<'py, PyCapsule>, Bound<'py, PyCapsule>)> { - let field = if let Some(schema_capsule) = requested_schema { - validate_pycapsule(&schema_capsule, "arrow_schema")?; - - let data: NonNull = schema_capsule - .pointer_checked(Some(c_str!("arrow_schema")))? - .cast(); - let schema_ptr = unsafe { data.as_ref() }; - let desired_field = Field::try_from(schema_ptr)?; - - Arc::new(desired_field) - } else { - Arc::clone(&self.field) - }; - - let ffi_schema = FFI_ArrowSchema::try_from(&field)?; - let schema_capsule = PyCapsule::new(py, ffi_schema, Some(cr"arrow_schema".into()))?; - - let ffi_array = FFI_ArrowArray::new(&self.array.to_data()); - let array_capsule = PyCapsule::new(py, ffi_array, Some(cr"arrow_array".into()))?; - - Ok((schema_capsule, array_capsule)) - } -} - -impl ToPyArrow for PyArrowArrayExportable { - fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult> { - let module = py.import("pyarrow")?; - let method = module.getattr("array")?; - let array = method.call((self.clone(),), None)?; - Ok(array) - } -} - -impl PyArrowArrayExportable { - pub fn new(array: ArrayRef, field: FieldRef) -> Self { - Self { array, field } - } -} diff --git a/src/catalog.rs b/src/catalog.rs deleted file mode 100644 index 43325c30d..000000000 --- a/src/catalog.rs +++ /dev/null @@ -1,719 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::any::Any; -use std::collections::HashSet; -use std::ptr::NonNull; -use std::sync::Arc; - -use async_trait::async_trait; -use datafusion::catalog::{ - CatalogProvider, CatalogProviderList, MemoryCatalogProvider, MemoryCatalogProviderList, - MemorySchemaProvider, SchemaProvider, -}; -use datafusion::common::DataFusionError; -use datafusion::datasource::TableProvider; -use datafusion_ffi::catalog_provider::FFI_CatalogProvider; -use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; -use datafusion_ffi::schema_provider::FFI_SchemaProvider; -use pyo3::IntoPyObjectExt; -use pyo3::exceptions::PyKeyError; -use pyo3::ffi::c_str; -use pyo3::prelude::*; -use pyo3::types::PyCapsule; - -use crate::dataset::Dataset; -use crate::errors::{PyDataFusionError, PyDataFusionResult, py_datafusion_err, to_datafusion_err}; -use crate::table::PyTable; -use crate::utils::{ - create_logical_extension_capsule, extract_logical_extension_codec, validate_pycapsule, - wait_for_future, -}; - -#[pyclass( - from_py_object, - frozen, - name = "RawCatalogList", - module = "datafusion.catalog", - subclass -)] -#[derive(Clone)] -pub struct PyCatalogList { - pub catalog_list: Arc, - codec: Arc, -} - -#[pyclass( - from_py_object, - frozen, - name = "RawCatalog", - module = "datafusion.catalog", - subclass -)] -#[derive(Clone)] -pub struct PyCatalog { - pub catalog: Arc, - codec: Arc, -} - -#[pyclass( - from_py_object, - frozen, - name = "RawSchema", - module = "datafusion.catalog", - subclass -)] -#[derive(Clone)] -pub struct PySchema { - pub schema: Arc, - codec: Arc, -} - -impl PyCatalog { - pub(crate) fn new_from_parts( - catalog: Arc, - codec: Arc, - ) -> Self { - Self { catalog, codec } - } -} - -impl PySchema { - pub(crate) fn new_from_parts( - schema: Arc, - codec: Arc, - ) -> Self { - Self { schema, codec } - } -} - -#[pymethods] -impl PyCatalogList { - #[new] - pub fn new( - py: Python, - catalog_list: Py, - session: Option>, - ) -> PyResult { - let codec = extract_logical_extension_codec(py, session)?; - let catalog_list = Arc::new(RustWrappedPyCatalogProviderList::new( - catalog_list, - codec.clone(), - )) as Arc; - Ok(Self { - catalog_list, - codec, - }) - } - - #[staticmethod] - pub fn memory_catalog_list(py: Python, session: Option>) -> PyResult { - let codec = extract_logical_extension_codec(py, session)?; - let catalog_list = - Arc::new(MemoryCatalogProviderList::default()) as Arc; - Ok(Self { - catalog_list, - codec, - }) - } - - pub fn catalog_names(&self) -> HashSet { - self.catalog_list.catalog_names().into_iter().collect() - } - - #[pyo3(signature = (name="public"))] - pub fn catalog(&self, name: &str) -> PyResult> { - let catalog = self - .catalog_list - .catalog(name) - .ok_or(PyKeyError::new_err(format!( - "Schema with name {name} doesn't exist." - )))?; - - Python::attach(|py| { - match catalog - .as_any() - .downcast_ref::() - { - Some(wrapped_catalog) => Ok(wrapped_catalog.catalog_provider.clone_ref(py)), - None => PyCatalog::new_from_parts(catalog, self.codec.clone()).into_py_any(py), - } - }) - } - - pub fn register_catalog(&self, name: &str, catalog_provider: Bound<'_, PyAny>) -> PyResult<()> { - let provider = extract_catalog_provider_from_pyobj(catalog_provider, self.codec.as_ref())?; - - let _ = self - .catalog_list - .register_catalog(name.to_owned(), provider); - - Ok(()) - } - - pub fn __repr__(&self) -> PyResult { - let mut names: Vec = self.catalog_names().into_iter().collect(); - names.sort(); - Ok(format!("CatalogList(catalog_names=[{}])", names.join(", "))) - } -} - -#[pymethods] -impl PyCatalog { - #[new] - pub fn new(py: Python, catalog: Py, session: Option>) -> PyResult { - let codec = extract_logical_extension_codec(py, session)?; - let catalog = Arc::new(RustWrappedPyCatalogProvider::new(catalog, codec.clone())) - as Arc; - Ok(Self { catalog, codec }) - } - - #[staticmethod] - pub fn memory_catalog(py: Python, session: Option>) -> PyResult { - let codec = extract_logical_extension_codec(py, session)?; - let catalog = Arc::new(MemoryCatalogProvider::default()) as Arc; - Ok(Self { catalog, codec }) - } - - pub fn schema_names(&self) -> HashSet { - self.catalog.schema_names().into_iter().collect() - } - - #[pyo3(signature = (name="public"))] - pub fn schema(&self, name: &str) -> PyResult> { - let schema = self - .catalog - .schema(name) - .ok_or(PyKeyError::new_err(format!( - "Schema with name {name} doesn't exist." - )))?; - - Python::attach(|py| { - match schema - .as_any() - .downcast_ref::() - { - Some(wrapped_schema) => Ok(wrapped_schema.schema_provider.clone_ref(py)), - None => PySchema::new_from_parts(schema, self.codec.clone()).into_py_any(py), - } - }) - } - - pub fn register_schema(&self, name: &str, schema_provider: Bound<'_, PyAny>) -> PyResult<()> { - let provider = extract_schema_provider_from_pyobj(schema_provider, self.codec.as_ref())?; - - let _ = self - .catalog - .register_schema(name, provider) - .map_err(py_datafusion_err)?; - - Ok(()) - } - - pub fn deregister_schema(&self, name: &str, cascade: bool) -> PyResult<()> { - let _ = self - .catalog - .deregister_schema(name, cascade) - .map_err(py_datafusion_err)?; - - Ok(()) - } - - pub fn __repr__(&self) -> PyResult { - let mut names: Vec = self.schema_names().into_iter().collect(); - names.sort(); - Ok(format!("Catalog(schema_names=[{}])", names.join(", "))) - } -} - -#[pymethods] -impl PySchema { - #[new] - pub fn new( - py: Python, - schema_provider: Py, - session: Option>, - ) -> PyResult { - let codec = extract_logical_extension_codec(py, session)?; - let schema = - Arc::new(RustWrappedPySchemaProvider::new(schema_provider)) as Arc; - Ok(Self { schema, codec }) - } - - #[staticmethod] - fn memory_schema(py: Python, session: Option>) -> PyResult { - let codec = extract_logical_extension_codec(py, session)?; - let schema = Arc::new(MemorySchemaProvider::default()) as Arc; - Ok(Self { schema, codec }) - } - - #[getter] - fn table_names(&self) -> HashSet { - self.schema.table_names().into_iter().collect() - } - - fn table(&self, name: &str, py: Python) -> PyDataFusionResult { - if let Some(table) = wait_for_future(py, self.schema.table(name))?? { - Ok(PyTable::from(table)) - } else { - Err(PyDataFusionError::Common(format!( - "Table not found: {name}" - ))) - } - } - - fn __repr__(&self) -> PyResult { - let mut names: Vec = self.table_names().into_iter().collect(); - names.sort(); - Ok(format!("Schema(table_names=[{}])", names.join(";"))) - } - - fn register_table(&self, name: &str, table_provider: Bound<'_, PyAny>) -> PyResult<()> { - let py = table_provider.py(); - let codec_capsule = create_logical_extension_capsule(py, self.codec.as_ref())? - .as_any() - .clone(); - - let table = PyTable::new(table_provider, Some(codec_capsule))?; - - let _ = self - .schema - .register_table(name.to_string(), table.table) - .map_err(py_datafusion_err)?; - - Ok(()) - } - - fn deregister_table(&self, name: &str) -> PyResult<()> { - let _ = self - .schema - .deregister_table(name) - .map_err(py_datafusion_err)?; - - Ok(()) - } - - fn table_exist(&self, name: &str) -> bool { - self.schema.table_exist(name) - } -} - -#[derive(Debug)] -pub(crate) struct RustWrappedPySchemaProvider { - schema_provider: Py, - owner_name: Option, -} - -impl RustWrappedPySchemaProvider { - pub fn new(schema_provider: Py) -> Self { - let owner_name = Python::attach(|py| { - schema_provider - .bind(py) - .getattr("owner_name") - .ok() - .map(|name| name.to_string()) - }); - - Self { - schema_provider, - owner_name, - } - } - - fn table_inner(&self, name: &str) -> PyResult>> { - Python::attach(|py| { - let provider = self.schema_provider.bind(py); - let py_table_method = provider.getattr("table")?; - - let py_table = py_table_method.call((name,), None)?; - if py_table.is_none() { - return Ok(None); - } - - let table = PyTable::new(py_table, None)?; - - Ok(Some(table.table)) - }) - } -} - -#[async_trait] -impl SchemaProvider for RustWrappedPySchemaProvider { - fn owner_name(&self) -> Option<&str> { - self.owner_name.as_deref() - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn table_names(&self) -> Vec { - Python::attach(|py| { - let provider = self.schema_provider.bind(py); - - provider - .getattr("table_names") - .and_then(|names| names.extract::>()) - .unwrap_or_else(|err| { - log::error!("Unable to get table_names: {err}"); - Vec::default() - }) - }) - } - - async fn table( - &self, - name: &str, - ) -> datafusion::common::Result>, DataFusionError> { - self.table_inner(name) - .map_err(|e| DataFusionError::External(Box::new(e))) - } - - fn register_table( - &self, - name: String, - table: Arc, - ) -> datafusion::common::Result>> { - let py_table = PyTable::from(table); - Python::attach(|py| { - let provider = self.schema_provider.bind(py); - let _ = provider - .call_method1("register_table", (name, py_table)) - .map_err(to_datafusion_err)?; - // Since the definition of `register_table` says that an error - // will be returned if the table already exists, there is no - // case where we want to return a table provider as output. - Ok(None) - }) - } - - fn deregister_table( - &self, - name: &str, - ) -> datafusion::common::Result>> { - Python::attach(|py| { - let provider = self.schema_provider.bind(py); - let table = provider - .call_method1("deregister_table", (name,)) - .map_err(to_datafusion_err)?; - if table.is_none() { - return Ok(None); - } - - // If we can turn this table provider into a `Dataset`, return it. - // Otherwise, return None. - let dataset = match Dataset::new(&table, py) { - Ok(dataset) => Some(Arc::new(dataset) as Arc), - Err(_) => None, - }; - - Ok(dataset) - }) - } - - fn table_exist(&self, name: &str) -> bool { - Python::attach(|py| { - let provider = self.schema_provider.bind(py); - provider - .call_method1("table_exist", (name,)) - .and_then(|pyobj| pyobj.extract()) - .unwrap_or(false) - }) - } -} - -#[derive(Debug)] -pub(crate) struct RustWrappedPyCatalogProvider { - pub(crate) catalog_provider: Py, - codec: Arc, -} - -impl RustWrappedPyCatalogProvider { - pub fn new(catalog_provider: Py, codec: Arc) -> Self { - Self { - catalog_provider, - codec, - } - } - - fn schema_inner(&self, name: &str) -> PyResult>> { - Python::attach(|py| { - let provider = self.catalog_provider.bind(py); - - let py_schema = provider.call_method1("schema", (name,))?; - if py_schema.is_none() { - return Ok(None); - } - - extract_schema_provider_from_pyobj(py_schema, self.codec.as_ref()).map(Some) - }) - } -} - -#[async_trait] -impl CatalogProvider for RustWrappedPyCatalogProvider { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema_names(&self) -> Vec { - Python::attach(|py| { - let provider = self.catalog_provider.bind(py); - provider - .call_method0("schema_names") - .and_then(|names| names.extract::>()) - .map(|names| names.into_iter().collect()) - .unwrap_or_else(|err| { - log::error!("Unable to get schema_names: {err}"); - Vec::default() - }) - }) - } - - fn schema(&self, name: &str) -> Option> { - self.schema_inner(name).unwrap_or_else(|err| { - log::error!("CatalogProvider schema returned error: {err}"); - None - }) - } - - fn register_schema( - &self, - name: &str, - schema: Arc, - ) -> datafusion::common::Result>> { - Python::attach(|py| { - let py_schema = match schema - .as_any() - .downcast_ref::() - { - Some(wrapped_schema) => wrapped_schema.schema_provider.as_any(), - None => &PySchema::new_from_parts(schema, self.codec.clone()) - .into_py_any(py) - .map_err(to_datafusion_err)?, - }; - - let provider = self.catalog_provider.bind(py); - let schema = provider - .call_method1("register_schema", (name, py_schema)) - .map_err(to_datafusion_err)?; - if schema.is_none() { - return Ok(None); - } - - let schema = Arc::new(RustWrappedPySchemaProvider::new(schema.into())) - as Arc; - - Ok(Some(schema)) - }) - } - - fn deregister_schema( - &self, - name: &str, - cascade: bool, - ) -> datafusion::common::Result>> { - Python::attach(|py| { - let provider = self.catalog_provider.bind(py); - let schema = provider - .call_method1("deregister_schema", (name, cascade)) - .map_err(to_datafusion_err)?; - if schema.is_none() { - return Ok(None); - } - - let schema = Arc::new(RustWrappedPySchemaProvider::new(schema.into())) - as Arc; - - Ok(Some(schema)) - }) - } -} - -#[derive(Debug)] -pub(crate) struct RustWrappedPyCatalogProviderList { - pub(crate) catalog_provider_list: Py, - codec: Arc, -} - -impl RustWrappedPyCatalogProviderList { - pub fn new(catalog_provider_list: Py, codec: Arc) -> Self { - Self { - catalog_provider_list, - codec, - } - } - - fn catalog_inner(&self, name: &str) -> PyResult>> { - Python::attach(|py| { - let provider = self.catalog_provider_list.bind(py); - - let py_schema = provider.call_method1("catalog", (name,))?; - if py_schema.is_none() { - return Ok(None); - } - - extract_catalog_provider_from_pyobj(py_schema, self.codec.as_ref()).map(Some) - }) - } -} - -#[async_trait] -impl CatalogProviderList for RustWrappedPyCatalogProviderList { - fn as_any(&self) -> &dyn Any { - self - } - - fn catalog_names(&self) -> Vec { - Python::attach(|py| { - let provider = self.catalog_provider_list.bind(py); - provider - .call_method0("catalog_names") - .and_then(|names| names.extract::>()) - .map(|names| names.into_iter().collect()) - .unwrap_or_else(|err| { - log::error!("Unable to get catalog_names: {err}"); - Vec::default() - }) - }) - } - - fn catalog(&self, name: &str) -> Option> { - self.catalog_inner(name).unwrap_or_else(|err| { - log::error!("CatalogProvider catalog returned error: {err}"); - None - }) - } - - fn register_catalog( - &self, - name: String, - catalog: Arc, - ) -> Option> { - Python::attach(|py| { - let py_catalog = match catalog - .as_any() - .downcast_ref::() - { - Some(wrapped_schema) => wrapped_schema.catalog_provider.as_any().clone_ref(py), - None => { - match PyCatalog::new_from_parts(catalog, self.codec.clone()).into_py_any(py) { - Ok(c) => c, - Err(err) => { - log::error!( - "register_catalog returned error during conversion to PyAny: {err}" - ); - return None; - } - } - } - }; - - let provider = self.catalog_provider_list.bind(py); - let catalog = match provider.call_method1("register_catalog", (name, py_catalog)) { - Ok(c) => c, - Err(err) => { - log::error!("register_catalog returned error: {err}"); - return None; - } - }; - if catalog.is_none() { - return None; - } - - let catalog = Arc::new(RustWrappedPyCatalogProvider::new( - catalog.into(), - self.codec.clone(), - )) as Arc; - - Some(catalog) - }) - } -} - -fn extract_catalog_provider_from_pyobj( - mut catalog_provider: Bound, - codec: &FFI_LogicalExtensionCodec, -) -> PyResult> { - if catalog_provider.hasattr("__datafusion_catalog_provider__")? { - let py = catalog_provider.py(); - let codec_capsule = create_logical_extension_capsule(py, codec)?; - catalog_provider = catalog_provider - .getattr("__datafusion_catalog_provider__")? - .call1((codec_capsule,))?; - } - - let provider = if let Ok(capsule) = catalog_provider.cast::() { - validate_pycapsule(capsule, "datafusion_catalog_provider")?; - let data: NonNull = capsule - .pointer_checked(Some(c_str!("datafusion_catalog_provider")))? - .cast(); - let provider = unsafe { data.as_ref() }; - let provider: Arc = provider.into(); - provider as Arc - } else { - match catalog_provider.extract::() { - Ok(py_catalog) => py_catalog.catalog, - Err(_) => Arc::new(RustWrappedPyCatalogProvider::new( - catalog_provider.into(), - Arc::new(codec.clone()), - )) as Arc, - } - }; - - Ok(provider) -} - -fn extract_schema_provider_from_pyobj( - mut schema_provider: Bound, - codec: &FFI_LogicalExtensionCodec, -) -> PyResult> { - if schema_provider.hasattr("__datafusion_schema_provider__")? { - let py = schema_provider.py(); - let codec_capsule = create_logical_extension_capsule(py, codec)?; - schema_provider = schema_provider - .getattr("__datafusion_schema_provider__")? - .call1((codec_capsule,))?; - } - - let provider = if let Ok(capsule) = schema_provider.cast::() { - validate_pycapsule(capsule, "datafusion_schema_provider")?; - - let data: NonNull = capsule - .pointer_checked(Some(c_str!("datafusion_schema_provider")))? - .cast(); - let provider = unsafe { data.as_ref() }; - let provider: Arc = provider.into(); - provider as Arc - } else { - match schema_provider.extract::() { - Ok(py_schema) => py_schema.schema, - Err(_) => Arc::new(RustWrappedPySchemaProvider::new(schema_provider.into())) - as Arc, - } - }; - - Ok(provider) -} - -pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - - Ok(()) -} diff --git a/src/common.rs b/src/common.rs deleted file mode 100644 index 88d2fdd5f..000000000 --- a/src/common.rs +++ /dev/null @@ -1,43 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use pyo3::prelude::*; - -pub mod data_type; -pub mod df_schema; -pub mod function; -pub mod schema; - -/// Initializes the `common` module to match the pattern of `datafusion-common` https://docs.rs/datafusion-common/18.0.0/datafusion_common/index.html -pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - Ok(()) -} diff --git a/src/common/data_type.rs b/src/common/data_type.rs deleted file mode 100644 index af4179806..000000000 --- a/src/common/data_type.rs +++ /dev/null @@ -1,792 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use datafusion::arrow::array::Array; -use datafusion::arrow::datatypes::{DataType, IntervalUnit, TimeUnit}; -use datafusion::common::ScalarValue; -use datafusion::logical_expr::expr::NullTreatment as DFNullTreatment; -use pyo3::exceptions::{PyNotImplementedError, PyValueError}; -use pyo3::prelude::*; - -/// A [`ScalarValue`] wrapped in a Python object. This struct allows for conversion -/// from a variety of Python objects into a [`ScalarValue`]. See -/// ``FromPyArrow::from_pyarrow_bound`` conversion details. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] -pub struct PyScalarValue(pub ScalarValue); - -impl From for PyScalarValue { - fn from(value: ScalarValue) -> Self { - Self(value) - } -} -impl From for ScalarValue { - fn from(value: PyScalarValue) -> Self { - value.0 - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass( - from_py_object, - frozen, - eq, - eq_int, - name = "RexType", - module = "datafusion.common" -)] -pub enum RexType { - Alias, - Literal, - Call, - Reference, - ScalarSubquery, - Other, -} - -/// These bindings are tying together several disparate systems. -/// You have SQL types for the SQL strings and RDBMS systems itself. -/// Rust types for the DataFusion code -/// Arrow types which represents the underlying arrow format -/// Python types which represent the type in Python -/// It is important to keep all of those types in a single -/// and manageable location. Therefore this structure exists -/// to map those types and provide a simple place for developers -/// to map types from one system to another. -// TODO: This looks like this needs pyo3 tracking so leaving unfrozen for now -#[derive(Debug, Clone)] -#[pyclass( - from_py_object, - name = "DataTypeMap", - module = "datafusion.common", - subclass -)] -pub struct DataTypeMap { - #[pyo3(get, set)] - pub arrow_type: PyDataType, - #[pyo3(get, set)] - pub python_type: PythonType, - #[pyo3(get, set)] - pub sql_type: SqlType, -} - -impl DataTypeMap { - fn new(arrow_type: DataType, python_type: PythonType, sql_type: SqlType) -> Self { - DataTypeMap { - arrow_type: PyDataType { - data_type: arrow_type, - }, - python_type, - sql_type, - } - } - - pub fn map_from_arrow_type(arrow_type: &DataType) -> Result { - match arrow_type { - DataType::Null => Ok(DataTypeMap::new( - DataType::Null, - PythonType::None, - SqlType::NULL, - )), - DataType::Boolean => Ok(DataTypeMap::new( - DataType::Boolean, - PythonType::Bool, - SqlType::BOOLEAN, - )), - DataType::Int8 => Ok(DataTypeMap::new( - DataType::Int8, - PythonType::Int, - SqlType::TINYINT, - )), - DataType::Int16 => Ok(DataTypeMap::new( - DataType::Int16, - PythonType::Int, - SqlType::SMALLINT, - )), - DataType::Int32 => Ok(DataTypeMap::new( - DataType::Int32, - PythonType::Int, - SqlType::INTEGER, - )), - DataType::Int64 => Ok(DataTypeMap::new( - DataType::Int64, - PythonType::Int, - SqlType::BIGINT, - )), - DataType::UInt8 => Ok(DataTypeMap::new( - DataType::UInt8, - PythonType::Int, - SqlType::TINYINT, - )), - DataType::UInt16 => Ok(DataTypeMap::new( - DataType::UInt16, - PythonType::Int, - SqlType::SMALLINT, - )), - DataType::UInt32 => Ok(DataTypeMap::new( - DataType::UInt32, - PythonType::Int, - SqlType::INTEGER, - )), - DataType::UInt64 => Ok(DataTypeMap::new( - DataType::UInt64, - PythonType::Int, - SqlType::BIGINT, - )), - DataType::Float16 => Ok(DataTypeMap::new( - DataType::Float16, - PythonType::Float, - SqlType::FLOAT, - )), - DataType::Float32 => Ok(DataTypeMap::new( - DataType::Float32, - PythonType::Float, - SqlType::FLOAT, - )), - DataType::Float64 => Ok(DataTypeMap::new( - DataType::Float64, - PythonType::Float, - SqlType::FLOAT, - )), - DataType::Timestamp(unit, tz) => Ok(DataTypeMap::new( - DataType::Timestamp(*unit, tz.clone()), - PythonType::Datetime, - SqlType::DATE, - )), - DataType::Date32 => Ok(DataTypeMap::new( - DataType::Date32, - PythonType::Datetime, - SqlType::DATE, - )), - DataType::Date64 => Ok(DataTypeMap::new( - DataType::Date64, - PythonType::Datetime, - SqlType::DATE, - )), - DataType::Time32(unit) => Ok(DataTypeMap::new( - DataType::Time32(*unit), - PythonType::Datetime, - SqlType::DATE, - )), - DataType::Time64(unit) => Ok(DataTypeMap::new( - DataType::Time64(*unit), - PythonType::Datetime, - SqlType::DATE, - )), - DataType::Duration(_) => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))), - DataType::Interval(interval_unit) => Ok(DataTypeMap::new( - DataType::Interval(*interval_unit), - PythonType::Datetime, - match interval_unit { - IntervalUnit::DayTime => SqlType::INTERVAL_DAY, - IntervalUnit::MonthDayNano => SqlType::INTERVAL_MONTH, - IntervalUnit::YearMonth => SqlType::INTERVAL_YEAR_MONTH, - }, - )), - DataType::Binary => Ok(DataTypeMap::new( - DataType::Binary, - PythonType::Bytes, - SqlType::BINARY, - )), - DataType::FixedSizeBinary(_) => { - Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))) - } - DataType::LargeBinary => Ok(DataTypeMap::new( - DataType::LargeBinary, - PythonType::Bytes, - SqlType::BINARY, - )), - DataType::Utf8 => Ok(DataTypeMap::new( - DataType::Utf8, - PythonType::Str, - SqlType::VARCHAR, - )), - DataType::LargeUtf8 => Ok(DataTypeMap::new( - DataType::LargeUtf8, - PythonType::Str, - SqlType::VARCHAR, - )), - DataType::List(_) => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))), - DataType::FixedSizeList(_, _) => { - Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))) - } - DataType::LargeList(_) => { - Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))) - } - DataType::Struct(_) => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))), - DataType::Union(_, _) => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))), - DataType::Dictionary(_, _) => { - Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))) - } - DataType::Decimal32(precision, scale) => Ok(DataTypeMap::new( - DataType::Decimal32(*precision, *scale), - PythonType::Float, - SqlType::DECIMAL, - )), - DataType::Decimal64(precision, scale) => Ok(DataTypeMap::new( - DataType::Decimal64(*precision, *scale), - PythonType::Float, - SqlType::DECIMAL, - )), - DataType::Decimal128(precision, scale) => Ok(DataTypeMap::new( - DataType::Decimal128(*precision, *scale), - PythonType::Float, - SqlType::DECIMAL, - )), - DataType::Decimal256(precision, scale) => Ok(DataTypeMap::new( - DataType::Decimal256(*precision, *scale), - PythonType::Float, - SqlType::DECIMAL, - )), - DataType::Map(_, _) => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))), - DataType::RunEndEncoded(_, _) => { - Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))) - } - DataType::BinaryView => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))), - DataType::Utf8View => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))), - DataType::ListView(_) => Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))), - DataType::LargeListView(_) => { - Err(PyNotImplementedError::new_err(format!("{arrow_type:?}"))) - } - } - } - - /// Generate the `DataTypeMap` from a `ScalarValue` instance - pub fn map_from_scalar_value(scalar_val: &ScalarValue) -> Result { - DataTypeMap::map_from_arrow_type(&DataTypeMap::map_from_scalar_to_arrow(scalar_val)?) - } - - /// Maps a `ScalarValue` to an Arrow `DataType` - pub fn map_from_scalar_to_arrow(scalar_val: &ScalarValue) -> Result { - match scalar_val { - ScalarValue::Boolean(_) => Ok(DataType::Boolean), - ScalarValue::Float16(_) => Ok(DataType::Float16), - ScalarValue::Float32(_) => Ok(DataType::Float32), - ScalarValue::Float64(_) => Ok(DataType::Float64), - ScalarValue::Decimal32(_, precision, scale) => { - Ok(DataType::Decimal32(*precision, *scale)) - } - ScalarValue::Decimal64(_, precision, scale) => { - Ok(DataType::Decimal64(*precision, *scale)) - } - ScalarValue::Decimal128(_, precision, scale) => { - Ok(DataType::Decimal128(*precision, *scale)) - } - ScalarValue::Decimal256(_, precision, scale) => { - Ok(DataType::Decimal256(*precision, *scale)) - } - ScalarValue::Dictionary(data_type, scalar_type) => { - // Call this function again to map the dictionary scalar_value to an Arrow type - Ok(DataType::Dictionary( - Box::new(*data_type.clone()), - Box::new(DataTypeMap::map_from_scalar_to_arrow(scalar_type)?), - )) - } - ScalarValue::Int8(_) => Ok(DataType::Int8), - ScalarValue::Int16(_) => Ok(DataType::Int16), - ScalarValue::Int32(_) => Ok(DataType::Int32), - ScalarValue::Int64(_) => Ok(DataType::Int64), - ScalarValue::UInt8(_) => Ok(DataType::UInt8), - ScalarValue::UInt16(_) => Ok(DataType::UInt16), - ScalarValue::UInt32(_) => Ok(DataType::UInt32), - ScalarValue::UInt64(_) => Ok(DataType::UInt64), - ScalarValue::Utf8(_) => Ok(DataType::Utf8), - ScalarValue::LargeUtf8(_) => Ok(DataType::LargeUtf8), - ScalarValue::Binary(_) => Ok(DataType::Binary), - ScalarValue::LargeBinary(_) => Ok(DataType::LargeBinary), - ScalarValue::Date32(_) => Ok(DataType::Date32), - ScalarValue::Date64(_) => Ok(DataType::Date64), - ScalarValue::Time32Second(_) => Ok(DataType::Time32(TimeUnit::Second)), - ScalarValue::Time32Millisecond(_) => Ok(DataType::Time32(TimeUnit::Millisecond)), - ScalarValue::Time64Microsecond(_) => Ok(DataType::Time64(TimeUnit::Microsecond)), - ScalarValue::Time64Nanosecond(_) => Ok(DataType::Time64(TimeUnit::Nanosecond)), - ScalarValue::Null => Ok(DataType::Null), - ScalarValue::TimestampSecond(_, tz) => { - Ok(DataType::Timestamp(TimeUnit::Second, tz.to_owned())) - } - ScalarValue::TimestampMillisecond(_, tz) => { - Ok(DataType::Timestamp(TimeUnit::Millisecond, tz.to_owned())) - } - ScalarValue::TimestampMicrosecond(_, tz) => { - Ok(DataType::Timestamp(TimeUnit::Microsecond, tz.to_owned())) - } - ScalarValue::TimestampNanosecond(_, tz) => { - Ok(DataType::Timestamp(TimeUnit::Nanosecond, tz.to_owned())) - } - ScalarValue::IntervalYearMonth(..) => Ok(DataType::Interval(IntervalUnit::YearMonth)), - ScalarValue::IntervalDayTime(..) => Ok(DataType::Interval(IntervalUnit::DayTime)), - ScalarValue::IntervalMonthDayNano(..) => { - Ok(DataType::Interval(IntervalUnit::MonthDayNano)) - } - ScalarValue::List(arr) => Ok(arr.data_type().to_owned()), - ScalarValue::Struct(_fields) => Err(PyNotImplementedError::new_err( - "ScalarValue::Struct".to_string(), - )), - ScalarValue::FixedSizeBinary(size, _) => Ok(DataType::FixedSizeBinary(*size)), - ScalarValue::FixedSizeList(_array_ref) => { - // The FieldRef was removed from ScalarValue::FixedSizeList in - // https://github.com/apache/arrow-datafusion/pull/8221, so we can no - // longer convert back to a DataType here - Err(PyNotImplementedError::new_err( - "ScalarValue::FixedSizeList".to_string(), - )) - } - ScalarValue::LargeList(_) => Err(PyNotImplementedError::new_err( - "ScalarValue::LargeList".to_string(), - )), - ScalarValue::DurationSecond(_) => Ok(DataType::Duration(TimeUnit::Second)), - ScalarValue::DurationMillisecond(_) => Ok(DataType::Duration(TimeUnit::Millisecond)), - ScalarValue::DurationMicrosecond(_) => Ok(DataType::Duration(TimeUnit::Microsecond)), - ScalarValue::DurationNanosecond(_) => Ok(DataType::Duration(TimeUnit::Nanosecond)), - ScalarValue::Union(_, _, _) => Err(PyNotImplementedError::new_err( - "ScalarValue::LargeList".to_string(), - )), - ScalarValue::Utf8View(_) => Ok(DataType::Utf8View), - ScalarValue::BinaryView(_) => Ok(DataType::BinaryView), - ScalarValue::Map(_) => Err(PyNotImplementedError::new_err( - "ScalarValue::Map".to_string(), - )), - ScalarValue::RunEndEncoded(field1, field2, _) => Ok(DataType::RunEndEncoded( - Arc::clone(field1), - Arc::clone(field2), - )), - } - } -} - -#[pymethods] -impl DataTypeMap { - #[new] - pub fn py_new(arrow_type: PyDataType, python_type: PythonType, sql_type: SqlType) -> Self { - DataTypeMap { - arrow_type, - python_type, - sql_type, - } - } - - #[staticmethod] - #[pyo3(name = "from_parquet_type_str")] - /// When using pyarrow.parquet.read_metadata().schema.column(x).physical_type you are presented - /// with a String type for schema rather than an object type. Here we make a best effort - /// to convert that to a physical type. - pub fn py_map_from_parquet_type_str(parquet_str_type: String) -> PyResult { - let arrow_dtype = match parquet_str_type.to_lowercase().as_str() { - "boolean" => Ok(DataType::Boolean), - "int32" => Ok(DataType::Int32), - "int64" => Ok(DataType::Int64), - "int96" => { - // Int96 is an old parquet datatype that is now deprecated. We convert to nanosecond timestamp - Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) - } - "float" => Ok(DataType::Float32), - "double" => Ok(DataType::Float64), - "byte_array" => Ok(DataType::Utf8), - _ => Err(PyValueError::new_err(format!( - "Unable to determine Arrow Data Type from Parquet String type: {parquet_str_type:?}" - ))), - }; - DataTypeMap::map_from_arrow_type(&arrow_dtype?) - } - - #[staticmethod] - #[pyo3(name = "arrow")] - pub fn py_map_from_arrow_type(arrow_type: &PyDataType) -> PyResult { - DataTypeMap::map_from_arrow_type(&arrow_type.data_type) - } - - #[staticmethod] - #[pyo3(name = "arrow_str")] - pub fn py_map_from_arrow_type_str(arrow_type_str: String) -> PyResult { - let data_type = PyDataType::py_map_from_arrow_type_str(arrow_type_str); - DataTypeMap::map_from_arrow_type(&data_type?.data_type) - } - - #[staticmethod] - #[pyo3(name = "sql")] - pub fn py_map_from_sql_type(sql_type: &SqlType) -> PyResult { - match sql_type { - SqlType::ANY => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::ARRAY => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::BIGINT => Ok(DataTypeMap::new( - DataType::Int64, - PythonType::Int, - SqlType::BIGINT, - )), - SqlType::BINARY => Ok(DataTypeMap::new( - DataType::Binary, - PythonType::Bytes, - SqlType::BINARY, - )), - SqlType::BOOLEAN => Ok(DataTypeMap::new( - DataType::Boolean, - PythonType::Bool, - SqlType::BOOLEAN, - )), - SqlType::CHAR => Ok(DataTypeMap::new( - DataType::UInt8, - PythonType::Int, - SqlType::CHAR, - )), - SqlType::COLUMN_LIST => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::CURSOR => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::DATE => Ok(DataTypeMap::new( - DataType::Date64, - PythonType::Datetime, - SqlType::DATE, - )), - SqlType::DECIMAL => Ok(DataTypeMap::new( - DataType::Decimal128(1, 1), - PythonType::Float, - SqlType::DECIMAL, - )), - SqlType::DISTINCT => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::DOUBLE => Ok(DataTypeMap::new( - DataType::Decimal256(1, 1), - PythonType::Float, - SqlType::DOUBLE, - )), - SqlType::DYNAMIC_STAR => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::FLOAT => Ok(DataTypeMap::new( - DataType::Decimal128(1, 1), - PythonType::Float, - SqlType::FLOAT, - )), - SqlType::GEOMETRY => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::INTEGER => Ok(DataTypeMap::new( - DataType::Int8, - PythonType::Int, - SqlType::INTEGER, - )), - SqlType::INTERVAL => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::INTERVAL_DAY => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::INTERVAL_DAY_HOUR => { - Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))) - } - SqlType::INTERVAL_DAY_MINUTE => { - Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))) - } - SqlType::INTERVAL_DAY_SECOND => { - Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))) - } - SqlType::INTERVAL_HOUR => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::INTERVAL_HOUR_MINUTE => { - Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))) - } - SqlType::INTERVAL_HOUR_SECOND => { - Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))) - } - SqlType::INTERVAL_MINUTE => { - Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))) - } - SqlType::INTERVAL_MINUTE_SECOND => { - Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))) - } - SqlType::INTERVAL_MONTH => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::INTERVAL_SECOND => { - Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))) - } - SqlType::INTERVAL_YEAR => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::INTERVAL_YEAR_MONTH => { - Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))) - } - SqlType::MAP => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::MULTISET => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::NULL => Ok(DataTypeMap::new( - DataType::Null, - PythonType::None, - SqlType::NULL, - )), - SqlType::OTHER => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::REAL => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::ROW => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::SARG => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::SMALLINT => Ok(DataTypeMap::new( - DataType::Int16, - PythonType::Int, - SqlType::SMALLINT, - )), - SqlType::STRUCTURED => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::SYMBOL => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::TIME => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::TIME_WITH_LOCAL_TIME_ZONE => { - Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))) - } - SqlType::TIMESTAMP => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::TIMESTAMP_WITH_LOCAL_TIME_ZONE => { - Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))) - } - SqlType::TINYINT => Ok(DataTypeMap::new( - DataType::Int8, - PythonType::Int, - SqlType::TINYINT, - )), - SqlType::UNKNOWN => Err(PyNotImplementedError::new_err(format!("{sql_type:?}"))), - SqlType::VARBINARY => Ok(DataTypeMap::new( - DataType::LargeBinary, - PythonType::Bytes, - SqlType::VARBINARY, - )), - SqlType::VARCHAR => Ok(DataTypeMap::new( - DataType::Utf8, - PythonType::Str, - SqlType::VARCHAR, - )), - } - } - - /// Unfortunately PyO3 does not allow for us to expose the DataType as an enum since - /// we cannot directly annotate the Enum instance of dependency code. Therefore, here - /// we provide an enum to mimic it. - #[pyo3(name = "friendly_arrow_type_name")] - pub fn friendly_arrow_type_name(&self) -> PyResult<&str> { - Ok(match &self.arrow_type.data_type { - DataType::Null => "Null", - DataType::Boolean => "Boolean", - DataType::Int8 => "Int8", - DataType::Int16 => "Int16", - DataType::Int32 => "Int32", - DataType::Int64 => "Int64", - DataType::UInt8 => "UInt8", - DataType::UInt16 => "UInt16", - DataType::UInt32 => "UInt32", - DataType::UInt64 => "UInt64", - DataType::Float16 => "Float16", - DataType::Float32 => "Float32", - DataType::Float64 => "Float64", - DataType::Timestamp(_, _) => "Timestamp", - DataType::Date32 => "Date32", - DataType::Date64 => "Date64", - DataType::Time32(_) => "Time32", - DataType::Time64(_) => "Time64", - DataType::Duration(_) => "Duration", - DataType::Interval(_) => "Interval", - DataType::Binary => "Binary", - DataType::FixedSizeBinary(_) => "FixedSizeBinary", - DataType::LargeBinary => "LargeBinary", - DataType::Utf8 => "Utf8", - DataType::LargeUtf8 => "LargeUtf8", - DataType::List(_) => "List", - DataType::FixedSizeList(_, _) => "FixedSizeList", - DataType::LargeList(_) => "LargeList", - DataType::Struct(_) => "Struct", - DataType::Union(_, _) => "Union", - DataType::Dictionary(_, _) => "Dictionary", - DataType::Decimal32(_, _) => "Decimal32", - DataType::Decimal64(_, _) => "Decimal64", - DataType::Decimal128(_, _) => "Decimal128", - DataType::Decimal256(_, _) => "Decimal256", - DataType::Map(_, _) => "Map", - DataType::RunEndEncoded(_, _) => "RunEndEncoded", - DataType::BinaryView => "BinaryView", - DataType::Utf8View => "Utf8View", - DataType::ListView(_) => "ListView", - DataType::LargeListView(_) => "LargeListView", - }) - } -} - -/// PyO3 requires that objects passed between Rust and Python implement the trait `PyClass` -/// Since `DataType` exists in another package we cannot make that happen here so we wrap -/// `DataType` as `PyDataType` This exists solely to satisfy those constraints. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass( - from_py_object, - frozen, - name = "DataType", - module = "datafusion.common" -)] -pub struct PyDataType { - pub data_type: DataType, -} - -impl PyDataType { - /// There are situations when obtaining dtypes on the Python side where the Arrow type - /// is presented as a String rather than an actual DataType. This function is used to - /// convert that String to a DataType for the Python side to use. - pub fn py_map_from_arrow_type_str(arrow_str_type: String) -> PyResult { - // Certain string types contain "metadata" that should be trimmed here. Ex: "datetime64[ns, Europe/Berlin]" - let arrow_str_type = match arrow_str_type.find('[') { - Some(index) => arrow_str_type[0..index].to_string(), - None => arrow_str_type, // Return early if ',' is not found. - }; - - let arrow_dtype = match arrow_str_type.to_lowercase().as_str() { - "bool" => Ok(DataType::Boolean), - "boolean" => Ok(DataType::Boolean), - "uint8" => Ok(DataType::UInt8), - "uint16" => Ok(DataType::UInt16), - "uint32" => Ok(DataType::UInt32), - "uint64" => Ok(DataType::UInt64), - "int8" => Ok(DataType::Int8), - "int16" => Ok(DataType::Int16), - "int32" => Ok(DataType::Int32), - "int64" => Ok(DataType::Int64), - "float" => Ok(DataType::Float32), - "double" => Ok(DataType::Float64), - "float16" => Ok(DataType::Float16), - "float32" => Ok(DataType::Float32), - "float64" => Ok(DataType::Float64), - "datetime64" => Ok(DataType::Date64), - "object" => Ok(DataType::Utf8), - _ => Err(PyValueError::new_err(format!( - "Unable to determine Arrow Data Type from Arrow String type: {arrow_str_type:?}" - ))), - }; - Ok(PyDataType { - data_type: arrow_dtype?, - }) - } -} - -impl From for DataType { - fn from(data_type: PyDataType) -> DataType { - data_type.data_type - } -} - -impl From for PyDataType { - fn from(data_type: DataType) -> PyDataType { - PyDataType { data_type } - } -} - -/// Represents the possible Python types that can be mapped to the SQL types -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass( - from_py_object, - frozen, - eq, - eq_int, - name = "PythonType", - module = "datafusion.common" -)] -pub enum PythonType { - Array, - Bool, - Bytes, - Datetime, - Float, - Int, - List, - None, - Object, - Str, -} - -/// Represents the types that are possible for DataFusion to parse -/// from a SQL query. Aka "SqlType" and are valid values for -/// ANSI SQL -#[allow(non_camel_case_types)] -#[allow(clippy::upper_case_acronyms)] -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass( - from_py_object, - frozen, - eq, - eq_int, - name = "SqlType", - module = "datafusion.common" -)] -pub enum SqlType { - ANY, - ARRAY, - BIGINT, - BINARY, - BOOLEAN, - CHAR, - COLUMN_LIST, - CURSOR, - DATE, - DECIMAL, - DISTINCT, - DOUBLE, - DYNAMIC_STAR, - FLOAT, - GEOMETRY, - INTEGER, - INTERVAL, - INTERVAL_DAY, - INTERVAL_DAY_HOUR, - INTERVAL_DAY_MINUTE, - INTERVAL_DAY_SECOND, - INTERVAL_HOUR, - INTERVAL_HOUR_MINUTE, - INTERVAL_HOUR_SECOND, - INTERVAL_MINUTE, - INTERVAL_MINUTE_SECOND, - INTERVAL_MONTH, - INTERVAL_SECOND, - INTERVAL_YEAR, - INTERVAL_YEAR_MONTH, - MAP, - MULTISET, - NULL, - OTHER, - REAL, - ROW, - SARG, - SMALLINT, - STRUCTURED, - SYMBOL, - TIME, - TIME_WITH_LOCAL_TIME_ZONE, - TIMESTAMP, - TIMESTAMP_WITH_LOCAL_TIME_ZONE, - TINYINT, - UNKNOWN, - VARBINARY, - VARCHAR, -} - -/// Specifies Ignore / Respect NULL within window functions. -/// For example -/// `FIRST_VALUE(column2) IGNORE NULLS OVER (PARTITION BY column1)` -#[allow(non_camel_case_types)] -#[allow(clippy::upper_case_acronyms)] -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass( - from_py_object, - frozen, - eq, - eq_int, - name = "NullTreatment", - module = "datafusion.common" -)] -pub enum NullTreatment { - IGNORE_NULLS, - RESPECT_NULLS, -} - -impl From for DFNullTreatment { - fn from(null_treatment: NullTreatment) -> DFNullTreatment { - match null_treatment { - NullTreatment::IGNORE_NULLS => DFNullTreatment::IgnoreNulls, - NullTreatment::RESPECT_NULLS => DFNullTreatment::RespectNulls, - } - } -} - -impl From for NullTreatment { - fn from(null_treatment: DFNullTreatment) -> NullTreatment { - match null_treatment { - DFNullTreatment::IgnoreNulls => NullTreatment::IGNORE_NULLS, - DFNullTreatment::RespectNulls => NullTreatment::RESPECT_NULLS, - } - } -} diff --git a/src/common/df_schema.rs b/src/common/df_schema.rs deleted file mode 100644 index 9167e772e..000000000 --- a/src/common/df_schema.rs +++ /dev/null @@ -1,63 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use datafusion::common::DFSchema; -use pyo3::prelude::*; - -#[derive(Debug, Clone)] -#[pyclass( - from_py_object, - frozen, - name = "DFSchema", - module = "datafusion.common", - subclass -)] -pub struct PyDFSchema { - schema: Arc, -} - -impl From for DFSchema { - fn from(schema: PyDFSchema) -> DFSchema { - (*schema.schema).clone() - } -} - -impl From for PyDFSchema { - fn from(schema: DFSchema) -> PyDFSchema { - PyDFSchema { - schema: Arc::new(schema), - } - } -} - -#[pymethods] -impl PyDFSchema { - #[pyo3(name = "empty")] - #[staticmethod] - fn py_empty() -> PyResult { - Ok(Self { - schema: Arc::new(DFSchema::empty()), - }) - } - - #[pyo3(name = "field_names")] - fn py_field_names(&self) -> PyResult> { - Ok(self.schema.field_names()) - } -} diff --git a/src/common/function.rs b/src/common/function.rs deleted file mode 100644 index 41cab515f..000000000 --- a/src/common/function.rs +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::collections::HashMap; - -use datafusion::arrow::datatypes::DataType; -use pyo3::prelude::*; - -use super::data_type::PyDataType; - -#[pyclass( - from_py_object, - frozen, - name = "SqlFunction", - module = "datafusion.common", - subclass -)] -#[derive(Debug, Clone)] -pub struct SqlFunction { - pub name: String, - pub return_types: HashMap, DataType>, - pub aggregation: bool, -} - -impl SqlFunction { - pub fn new( - function_name: String, - input_types: Vec, - return_type: PyDataType, - aggregation_bool: bool, - ) -> Self { - let mut func = Self { - name: function_name, - return_types: HashMap::new(), - aggregation: aggregation_bool, - }; - func.add_type_mapping(input_types, return_type); - func - } - - pub fn add_type_mapping(&mut self, input_types: Vec, return_type: PyDataType) { - self.return_types.insert( - input_types.iter().map(|t| t.clone().into()).collect(), - return_type.into(), - ); - } -} diff --git a/src/common/schema.rs b/src/common/schema.rs deleted file mode 100644 index 29a27b204..000000000 --- a/src/common/schema.rs +++ /dev/null @@ -1,389 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::any::Any; -use std::borrow::Cow; -use std::fmt::{self, Display, Formatter}; -use std::sync::Arc; - -use arrow::datatypes::Schema; -use arrow::pyarrow::PyArrowType; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::common::Constraints; -use datafusion::datasource::TableType; -use datafusion::logical_expr::utils::split_conjunction; -use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableSource}; -use parking_lot::RwLock; -use pyo3::prelude::*; - -use super::data_type::DataTypeMap; -use super::function::SqlFunction; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - name = "SqlSchema", - module = "datafusion.common", - subclass, - frozen -)] -#[derive(Debug, Clone)] -pub struct SqlSchema { - name: Arc>, - tables: Arc>>, - views: Arc>>, - functions: Arc>>, -} - -#[pyclass( - from_py_object, - name = "SqlTable", - module = "datafusion.common", - subclass -)] -#[derive(Debug, Clone)] -pub struct SqlTable { - #[pyo3(get, set)] - pub name: String, - #[pyo3(get, set)] - pub columns: Vec<(String, DataTypeMap)>, - #[pyo3(get, set)] - pub primary_key: Option, - #[pyo3(get, set)] - pub foreign_keys: Vec, - #[pyo3(get, set)] - pub indexes: Vec, - #[pyo3(get, set)] - pub constraints: Vec, - #[pyo3(get, set)] - pub statistics: SqlStatistics, - #[pyo3(get, set)] - pub filepaths: Option>, -} - -#[pymethods] -impl SqlTable { - #[new] - #[pyo3(signature = (table_name, columns, row_count, filepaths=None))] - pub fn new( - table_name: String, - columns: Vec<(String, DataTypeMap)>, - row_count: f64, - filepaths: Option>, - ) -> Self { - Self { - name: table_name, - columns, - primary_key: None, - foreign_keys: Vec::new(), - indexes: Vec::new(), - constraints: Vec::new(), - statistics: SqlStatistics::new(row_count), - filepaths, - } - } -} - -#[pyclass( - from_py_object, - name = "SqlView", - module = "datafusion.common", - subclass -)] -#[derive(Debug, Clone)] -pub struct SqlView { - #[pyo3(get, set)] - pub name: String, - #[pyo3(get, set)] - pub definition: String, // SQL code that defines the view -} - -#[pymethods] -impl SqlSchema { - #[new] - pub fn new(schema_name: &str) -> Self { - Self { - name: Arc::new(RwLock::new(schema_name.to_owned())), - tables: Arc::new(RwLock::new(Vec::new())), - views: Arc::new(RwLock::new(Vec::new())), - functions: Arc::new(RwLock::new(Vec::new())), - } - } - - #[getter] - fn name(&self) -> PyResult { - Ok(self.name.read().clone()) - } - - #[setter] - fn set_name(&self, value: String) -> PyResult<()> { - *self.name.write() = value; - Ok(()) - } - - #[getter] - fn tables(&self) -> PyResult> { - Ok(self.tables.read().clone()) - } - - #[setter] - fn set_tables(&self, tables: Vec) -> PyResult<()> { - *self.tables.write() = tables; - Ok(()) - } - - #[getter] - fn views(&self) -> PyResult> { - Ok(self.views.read().clone()) - } - - #[setter] - fn set_views(&self, views: Vec) -> PyResult<()> { - *self.views.write() = views; - Ok(()) - } - - #[getter] - fn functions(&self) -> PyResult> { - Ok(self.functions.read().clone()) - } - - #[setter] - fn set_functions(&self, functions: Vec) -> PyResult<()> { - *self.functions.write() = functions; - Ok(()) - } - - pub fn table_by_name(&self, table_name: &str) -> Option { - let tables = self.tables.read(); - tables.iter().find(|tbl| tbl.name.eq(table_name)).cloned() - } - - pub fn add_table(&self, table: SqlTable) { - let mut tables = self.tables.write(); - tables.push(table); - } - - pub fn drop_table(&self, table_name: String) { - let mut tables = self.tables.write(); - tables.retain(|x| !x.name.eq(&table_name)); - } -} - -/// SqlTable wrapper that is compatible with DataFusion logical query plans -pub struct SqlTableSource { - schema: SchemaRef, - statistics: Option, - filepaths: Option>, -} - -impl SqlTableSource { - /// Initialize a new `EmptyTable` from a schema - pub fn new( - schema: SchemaRef, - statistics: Option, - filepaths: Option>, - ) -> Self { - Self { - schema, - statistics, - filepaths, - } - } - - /// Access optional statistics associated with this table source - pub fn statistics(&self) -> Option<&SqlStatistics> { - self.statistics.as_ref() - } - - /// Access optional filepath associated with this table source - #[allow(dead_code)] - pub fn filepaths(&self) -> Option<&Vec> { - self.filepaths.as_ref() - } -} - -/// Implement TableSource, used in the logical query plan and in logical query optimizations -impl TableSource for SqlTableSource { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> SchemaRef { - self.schema.clone() - } - - fn table_type(&self) -> datafusion::logical_expr::TableType { - datafusion::logical_expr::TableType::Base - } - - fn supports_filters_pushdown( - &self, - filters: &[&Expr], - ) -> datafusion::common::Result> { - filters - .iter() - .map(|f| { - let filters = split_conjunction(f); - if filters.iter().all(|f| is_supported_push_down_expr(f)) { - // Push down filters to the tablescan operation if all are supported - Ok(TableProviderFilterPushDown::Exact) - } else if filters.iter().any(|f| is_supported_push_down_expr(f)) { - // Partially apply the filter in the TableScan but retain - // the Filter operator in the plan as well - Ok(TableProviderFilterPushDown::Inexact) - } else { - Ok(TableProviderFilterPushDown::Unsupported) - } - }) - .collect() - } - - fn get_logical_plan(&self) -> Option> { - None - } -} - -fn is_supported_push_down_expr(_expr: &Expr) -> bool { - // For now we support all kinds of expr's at this level - true -} - -#[pyclass( - from_py_object, - frozen, - name = "SqlStatistics", - module = "datafusion.common", - subclass -)] -#[derive(Debug, Clone)] -pub struct SqlStatistics { - row_count: f64, -} - -#[pymethods] -impl SqlStatistics { - #[new] - pub fn new(row_count: f64) -> Self { - Self { row_count } - } - - #[pyo3(name = "getRowCount")] - pub fn get_row_count(&self) -> f64 { - self.row_count - } -} - -#[pyclass( - from_py_object, - frozen, - name = "Constraints", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyConstraints { - pub constraints: Constraints, -} - -impl From for Constraints { - fn from(constraints: PyConstraints) -> Self { - constraints.constraints - } -} - -impl From for PyConstraints { - fn from(constraints: Constraints) -> Self { - PyConstraints { constraints } - } -} - -impl Display for PyConstraints { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "Constraints: {:?}", self.constraints) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass( - from_py_object, - frozen, - eq, - eq_int, - name = "TableType", - module = "datafusion.common" -)] -pub enum PyTableType { - Base, - View, - Temporary, -} - -impl From for datafusion::logical_expr::TableType { - fn from(table_type: PyTableType) -> Self { - match table_type { - PyTableType::Base => datafusion::logical_expr::TableType::Base, - PyTableType::View => datafusion::logical_expr::TableType::View, - PyTableType::Temporary => datafusion::logical_expr::TableType::Temporary, - } - } -} - -impl From for PyTableType { - fn from(table_type: TableType) -> Self { - match table_type { - datafusion::logical_expr::TableType::Base => PyTableType::Base, - datafusion::logical_expr::TableType::View => PyTableType::View, - datafusion::logical_expr::TableType::Temporary => PyTableType::Temporary, - } - } -} - -#[pyclass( - from_py_object, - frozen, - name = "TableSource", - module = "datafusion.common", - subclass -)] -#[derive(Clone)] -pub struct PyTableSource { - pub table_source: Arc, -} - -#[pymethods] -impl PyTableSource { - pub fn schema(&self) -> PyArrowType { - (*self.table_source.schema()).clone().into() - } - - pub fn constraints(&self) -> Option { - self.table_source.constraints().map(|c| PyConstraints { - constraints: c.clone(), - }) - } - - pub fn table_type(&self) -> PyTableType { - self.table_source.table_type().into() - } - - pub fn get_logical_plan(&self) -> Option { - self.table_source - .get_logical_plan() - .map(|plan| PyLogicalPlan::new(plan.into_owned())) - } -} diff --git a/src/config.rs b/src/config.rs deleted file mode 100644 index fdb693a12..000000000 --- a/src/config.rs +++ /dev/null @@ -1,104 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use datafusion::config::ConfigOptions; -use parking_lot::RwLock; -use pyo3::prelude::*; -use pyo3::types::*; - -use crate::common::data_type::PyScalarValue; -use crate::errors::PyDataFusionResult; -#[pyclass( - from_py_object, - name = "Config", - module = "datafusion", - subclass, - frozen -)] -#[derive(Clone)] -pub(crate) struct PyConfig { - config: Arc>, -} - -#[pymethods] -impl PyConfig { - #[new] - fn py_new() -> Self { - Self { - config: Arc::new(RwLock::new(ConfigOptions::new())), - } - } - - /// Get configurations from environment variables - #[staticmethod] - pub fn from_env() -> PyDataFusionResult { - Ok(Self { - config: Arc::new(RwLock::new(ConfigOptions::from_env()?)), - }) - } - - /// Get a configuration option - pub fn get<'py>(&self, key: &str, py: Python<'py>) -> PyResult> { - let value: Option> = { - let options = self.config.read(); - options - .entries() - .into_iter() - .find_map(|entry| (entry.key == key).then_some(entry.value.clone())) - }; - - match value { - Some(value) => Ok(value.into_pyobject(py)?), - None => Ok(None::.into_pyobject(py)?), - } - } - - /// Set a configuration option - pub fn set(&self, key: &str, value: Py, py: Python) -> PyDataFusionResult<()> { - let scalar_value: PyScalarValue = value.extract(py)?; - let mut options = self.config.write(); - options.set(key, scalar_value.0.to_string().as_str())?; - Ok(()) - } - - /// Get all configuration options - pub fn get_all(&self, py: Python) -> PyResult> { - let entries: Vec<(String, Option)> = { - let options = self.config.read(); - options - .entries() - .into_iter() - .map(|entry| (entry.key.clone(), entry.value.clone())) - .collect() - }; - - let dict = PyDict::new(py); - for (key, value) in entries { - dict.set_item(key, value.into_pyobject(py)?)?; - } - Ok(dict.into()) - } - - fn __repr__(&self, py: Python) -> PyResult { - match self.get_all(py) { - Ok(result) => Ok(format!("Config({result})")), - Err(err) => Ok(format!("Error: {:?}", err.to_string())), - } - } -} diff --git a/src/context.rs b/src/context.rs deleted file mode 100644 index 2eaf5a737..000000000 --- a/src/context.rs +++ /dev/null @@ -1,1281 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::collections::{HashMap, HashSet}; -use std::path::PathBuf; -use std::ptr::NonNull; -use std::str::FromStr; -use std::sync::Arc; - -use arrow::array::RecordBatchReader; -use arrow::ffi_stream::ArrowArrayStreamReader; -use arrow::pyarrow::FromPyArrow; -use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef}; -use datafusion::arrow::pyarrow::PyArrowType; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::catalog::{CatalogProvider, CatalogProviderList}; -use datafusion::common::{ScalarValue, TableReference, exec_err}; -use datafusion::datasource::file_format::file_compression_type::FileCompressionType; -use datafusion::datasource::file_format::parquet::ParquetFormat; -use datafusion::datasource::listing::{ - ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, -}; -use datafusion::datasource::{MemTable, TableProvider}; -use datafusion::execution::TaskContextProvider; -use datafusion::execution::context::{ - DataFilePaths, SQLOptions, SessionConfig, SessionContext, TaskContext, -}; -use datafusion::execution::disk_manager::DiskManagerMode; -use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, UnboundedMemoryPool}; -use datafusion::execution::options::ReadOptions; -use datafusion::execution::runtime_env::RuntimeEnvBuilder; -use datafusion::execution::session_state::SessionStateBuilder; -use datafusion::prelude::{ - AvroReadOptions, CsvReadOptions, DataFrame, JsonReadOptions, ParquetReadOptions, -}; -use datafusion_ffi::catalog_provider::FFI_CatalogProvider; -use datafusion_ffi::catalog_provider_list::FFI_CatalogProviderList; -use datafusion_ffi::execution::FFI_TaskContextProvider; -use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; -use datafusion_proto::logical_plan::DefaultLogicalExtensionCodec; -use object_store::ObjectStore; -use pyo3::IntoPyObjectExt; -use pyo3::exceptions::{PyKeyError, PyValueError}; -use pyo3::ffi::c_str; -use pyo3::prelude::*; -use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple}; -use url::Url; -use uuid::Uuid; - -use crate::catalog::{ - PyCatalog, PyCatalogList, RustWrappedPyCatalogProvider, RustWrappedPyCatalogProviderList, -}; -use crate::common::data_type::PyScalarValue; -use crate::dataframe::PyDataFrame; -use crate::dataset::Dataset; -use crate::errors::{ - PyDataFusionError, PyDataFusionResult, from_datafusion_error, py_datafusion_err, -}; -use crate::expr::sort_expr::PySortExpr; -use crate::options::PyCsvReadOptions; -use crate::physical_plan::PyExecutionPlan; -use crate::record_batch::PyRecordBatchStream; -use crate::sql::logical::PyLogicalPlan; -use crate::sql::util::replace_placeholders_with_strings; -use crate::store::StorageContexts; -use crate::table::PyTable; -use crate::udaf::PyAggregateUDF; -use crate::udf::PyScalarUDF; -use crate::udtf::PyTableFunction; -use crate::udwf::PyWindowUDF; -use crate::utils::{ - create_logical_extension_capsule, extract_logical_extension_codec, get_global_ctx, - get_tokio_runtime, spawn_future, validate_pycapsule, wait_for_future, -}; - -/// Configuration options for a SessionContext -#[pyclass( - from_py_object, - frozen, - name = "SessionConfig", - module = "datafusion", - subclass -)] -#[derive(Clone, Default)] -pub struct PySessionConfig { - pub config: SessionConfig, -} - -impl From for PySessionConfig { - fn from(config: SessionConfig) -> Self { - Self { config } - } -} - -#[pymethods] -impl PySessionConfig { - #[pyo3(signature = (config_options=None))] - #[new] - fn new(config_options: Option>) -> Self { - let mut config = SessionConfig::new(); - if let Some(hash_map) = config_options { - for (k, v) in &hash_map { - config = config.set(k, &ScalarValue::Utf8(Some(v.clone()))); - } - } - - Self { config } - } - - fn with_create_default_catalog_and_schema(&self, enabled: bool) -> Self { - Self::from( - self.config - .clone() - .with_create_default_catalog_and_schema(enabled), - ) - } - - fn with_default_catalog_and_schema(&self, catalog: &str, schema: &str) -> Self { - Self::from( - self.config - .clone() - .with_default_catalog_and_schema(catalog, schema), - ) - } - - fn with_information_schema(&self, enabled: bool) -> Self { - Self::from(self.config.clone().with_information_schema(enabled)) - } - - fn with_batch_size(&self, batch_size: usize) -> Self { - Self::from(self.config.clone().with_batch_size(batch_size)) - } - - fn with_target_partitions(&self, target_partitions: usize) -> Self { - Self::from( - self.config - .clone() - .with_target_partitions(target_partitions), - ) - } - - fn with_repartition_aggregations(&self, enabled: bool) -> Self { - Self::from(self.config.clone().with_repartition_aggregations(enabled)) - } - - fn with_repartition_joins(&self, enabled: bool) -> Self { - Self::from(self.config.clone().with_repartition_joins(enabled)) - } - - fn with_repartition_windows(&self, enabled: bool) -> Self { - Self::from(self.config.clone().with_repartition_windows(enabled)) - } - - fn with_repartition_sorts(&self, enabled: bool) -> Self { - Self::from(self.config.clone().with_repartition_sorts(enabled)) - } - - fn with_repartition_file_scans(&self, enabled: bool) -> Self { - Self::from(self.config.clone().with_repartition_file_scans(enabled)) - } - - fn with_repartition_file_min_size(&self, size: usize) -> Self { - Self::from(self.config.clone().with_repartition_file_min_size(size)) - } - - fn with_parquet_pruning(&self, enabled: bool) -> Self { - Self::from(self.config.clone().with_parquet_pruning(enabled)) - } - - fn set(&self, key: &str, value: &str) -> Self { - Self::from(self.config.clone().set_str(key, value)) - } -} - -/// Runtime options for a SessionContext -#[pyclass( - from_py_object, - frozen, - name = "RuntimeEnvBuilder", - module = "datafusion", - subclass -)] -#[derive(Clone)] -pub struct PyRuntimeEnvBuilder { - pub builder: RuntimeEnvBuilder, -} - -#[pymethods] -impl PyRuntimeEnvBuilder { - #[new] - fn new() -> Self { - Self { - builder: RuntimeEnvBuilder::default(), - } - } - - fn with_disk_manager_disabled(&self) -> Self { - let mut runtime_builder = self.builder.clone(); - - let mut disk_mgr_builder = runtime_builder - .disk_manager_builder - .clone() - .unwrap_or_default(); - disk_mgr_builder.set_mode(DiskManagerMode::Disabled); - - runtime_builder = runtime_builder.with_disk_manager_builder(disk_mgr_builder); - Self { - builder: runtime_builder, - } - } - - fn with_disk_manager_os(&self) -> Self { - let mut runtime_builder = self.builder.clone(); - - let mut disk_mgr_builder = runtime_builder - .disk_manager_builder - .clone() - .unwrap_or_default(); - disk_mgr_builder.set_mode(DiskManagerMode::OsTmpDirectory); - - runtime_builder = runtime_builder.with_disk_manager_builder(disk_mgr_builder); - Self { - builder: runtime_builder, - } - } - - fn with_disk_manager_specified(&self, paths: Vec) -> Self { - let paths = paths.iter().map(|s| s.into()).collect(); - let mut runtime_builder = self.builder.clone(); - - let mut disk_mgr_builder = runtime_builder - .disk_manager_builder - .clone() - .unwrap_or_default(); - disk_mgr_builder.set_mode(DiskManagerMode::Directories(paths)); - - runtime_builder = runtime_builder.with_disk_manager_builder(disk_mgr_builder); - Self { - builder: runtime_builder, - } - } - - fn with_unbounded_memory_pool(&self) -> Self { - let builder = self.builder.clone(); - let builder = builder.with_memory_pool(Arc::new(UnboundedMemoryPool::default())); - Self { builder } - } - - fn with_fair_spill_pool(&self, size: usize) -> Self { - let builder = self.builder.clone(); - let builder = builder.with_memory_pool(Arc::new(FairSpillPool::new(size))); - Self { builder } - } - - fn with_greedy_memory_pool(&self, size: usize) -> Self { - let builder = self.builder.clone(); - let builder = builder.with_memory_pool(Arc::new(GreedyMemoryPool::new(size))); - Self { builder } - } - - fn with_temp_file_path(&self, path: &str) -> Self { - let builder = self.builder.clone(); - let builder = builder.with_temp_file_path(path); - Self { builder } - } -} - -/// `PySQLOptions` allows you to specify options to the sql execution. -#[pyclass( - from_py_object, - frozen, - name = "SQLOptions", - module = "datafusion", - subclass -)] -#[derive(Clone)] -pub struct PySQLOptions { - pub options: SQLOptions, -} - -impl From for PySQLOptions { - fn from(options: SQLOptions) -> Self { - Self { options } - } -} - -#[pymethods] -impl PySQLOptions { - #[new] - fn new() -> Self { - let options = SQLOptions::new(); - Self { options } - } - - /// Should DDL data modification commands (e.g. `CREATE TABLE`) be run? Defaults to `true`. - fn with_allow_ddl(&self, allow: bool) -> Self { - Self::from(self.options.with_allow_ddl(allow)) - } - - /// Should DML data modification commands (e.g. `INSERT and COPY`) be run? Defaults to `true` - pub fn with_allow_dml(&self, allow: bool) -> Self { - Self::from(self.options.with_allow_dml(allow)) - } - - /// Should Statements such as (e.g. `SET VARIABLE and `BEGIN TRANSACTION` ...`) be run?. Defaults to `true` - pub fn with_allow_statements(&self, allow: bool) -> Self { - Self::from(self.options.with_allow_statements(allow)) - } -} - -/// `PySessionContext` is able to plan and execute DataFusion plans. -/// It has a powerful optimizer, a physical planner for local execution, and a -/// multi-threaded execution engine to perform the execution. -#[pyclass( - from_py_object, - frozen, - name = "SessionContext", - module = "datafusion", - subclass -)] -#[derive(Clone)] -pub struct PySessionContext { - pub ctx: Arc, - logical_codec: Arc, -} - -#[pymethods] -impl PySessionContext { - #[pyo3(signature = (config=None, runtime=None))] - #[new] - pub fn new( - config: Option, - runtime: Option, - ) -> PyDataFusionResult { - let config = if let Some(c) = config { - c.config - } else { - SessionConfig::default().with_information_schema(true) - }; - let runtime_env_builder = if let Some(c) = runtime { - c.builder - } else { - RuntimeEnvBuilder::default() - }; - let runtime = Arc::new(runtime_env_builder.build()?); - let session_state = SessionStateBuilder::new() - .with_config(config) - .with_runtime_env(runtime) - .with_default_features() - .build(); - let ctx = Arc::new(SessionContext::new_with_state(session_state)); - let logical_codec = Self::default_logical_codec(&ctx); - Ok(PySessionContext { ctx, logical_codec }) - } - - pub fn enable_url_table(&self) -> PyResult { - Ok(PySessionContext { - ctx: Arc::new(self.ctx.as_ref().clone().enable_url_table()), - logical_codec: Arc::clone(&self.logical_codec), - }) - } - - #[staticmethod] - #[pyo3(signature = ())] - pub fn global_ctx() -> PyResult { - let ctx = get_global_ctx().clone(); - let logical_codec = Self::default_logical_codec(&ctx); - Ok(Self { ctx, logical_codec }) - } - - /// Register an object store with the given name - #[pyo3(signature = (scheme, store, host=None))] - pub fn register_object_store( - &self, - scheme: &str, - store: StorageContexts, - host: Option<&str>, - ) -> PyResult<()> { - // for most stores the "host" is the bucket name and can be inferred from the store - let (store, upstream_host): (Arc, String) = match store { - StorageContexts::AmazonS3(s3) => (s3.inner, s3.bucket_name), - StorageContexts::GoogleCloudStorage(gcs) => (gcs.inner, gcs.bucket_name), - StorageContexts::MicrosoftAzure(azure) => (azure.inner, azure.container_name), - StorageContexts::LocalFileSystem(local) => (local.inner, "".to_string()), - StorageContexts::HTTP(http) => (http.store, http.url), - }; - - // let users override the host to match the api signature from upstream - let derived_host = if let Some(host) = host { - host - } else { - &upstream_host - }; - let url_string = format!("{scheme}{derived_host}"); - let url = Url::parse(&url_string).unwrap(); - self.ctx.runtime_env().register_object_store(&url, store); - Ok(()) - } - - #[allow(clippy::too_many_arguments)] - #[pyo3(signature = (name, path, table_partition_cols=vec![], - file_extension=".parquet", - schema=None, - file_sort_order=None))] - pub fn register_listing_table( - &self, - name: &str, - path: &str, - table_partition_cols: Vec<(String, PyArrowType)>, - file_extension: &str, - schema: Option>, - file_sort_order: Option>>, - py: Python, - ) -> PyDataFusionResult<()> { - let options = ListingOptions::new(Arc::new(ParquetFormat::new())) - .with_file_extension(file_extension) - .with_table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ) - .with_file_sort_order( - file_sort_order - .unwrap_or_default() - .into_iter() - .map(|e| e.into_iter().map(|f| f.into()).collect()) - .collect(), - ); - let table_path = ListingTableUrl::parse(path)?; - let resolved_schema: SchemaRef = match schema { - Some(s) => Arc::new(s.0), - None => { - let state = self.ctx.state(); - let schema = options.infer_schema(&state, &table_path); - wait_for_future(py, schema)?? - } - }; - let config = ListingTableConfig::new(table_path) - .with_listing_options(options) - .with_schema(resolved_schema); - let table = ListingTable::try_new(config)?; - self.ctx.register_table(name, Arc::new(table))?; - Ok(()) - } - - pub fn register_udtf(&self, func: PyTableFunction) { - let name = func.name.clone(); - let func = Arc::new(func); - self.ctx.register_udtf(&name, func); - } - - #[pyo3(signature = (query, options=None, param_values=HashMap::default(), param_strings=HashMap::default()))] - pub fn sql_with_options( - &self, - py: Python, - mut query: String, - options: Option, - param_values: HashMap, - param_strings: HashMap, - ) -> PyDataFusionResult { - let options = if let Some(options) = options { - options.options - } else { - SQLOptions::new() - }; - - let param_values = param_values - .into_iter() - .map(|(name, value)| (name, ScalarValue::from(value))) - .collect::>(); - - let state = self.ctx.state(); - let dialect = state.config().options().sql_parser.dialect.as_ref(); - - if !param_strings.is_empty() { - query = replace_placeholders_with_strings(&query, dialect, param_strings)?; - } - - let mut df = wait_for_future(py, async { - self.ctx.sql_with_options(&query, options).await - })? - .map_err(from_datafusion_error)?; - - if !param_values.is_empty() { - df = df.with_param_values(param_values)?; - } - - Ok(PyDataFrame::new(df)) - } - - #[pyo3(signature = (partitions, name=None, schema=None))] - pub fn create_dataframe( - &self, - partitions: PyArrowType>>, - name: Option<&str>, - schema: Option>, - py: Python, - ) -> PyDataFusionResult { - let schema = if let Some(schema) = schema { - SchemaRef::from(schema.0) - } else { - partitions.0[0][0].schema() - }; - - let table = MemTable::try_new(schema, partitions.0)?; - - // generate a random (unique) name for this table if none is provided - // table name cannot start with numeric digit - let table_name = match name { - Some(val) => val.to_owned(), - None => { - "c".to_owned() - + Uuid::new_v4() - .simple() - .encode_lower(&mut Uuid::encode_buffer()) - } - }; - - self.ctx.register_table(&*table_name, Arc::new(table))?; - - let table = wait_for_future(py, self._table(&table_name))??; - - let df = PyDataFrame::new(table); - Ok(df) - } - - /// Create a DataFrame from an existing logical plan - pub fn create_dataframe_from_logical_plan(&self, plan: PyLogicalPlan) -> PyDataFrame { - PyDataFrame::new(DataFrame::new(self.ctx.state(), plan.plan.as_ref().clone())) - } - - /// Construct datafusion dataframe from Python list - #[pyo3(signature = (data, name=None))] - pub fn from_pylist( - &self, - data: Bound<'_, PyList>, - name: Option<&str>, - ) -> PyResult { - // Acquire GIL Token - let py = data.py(); - - // Instantiate pyarrow Table object & convert to Arrow Table - let table_class = py.import("pyarrow")?.getattr("Table")?; - let args = PyTuple::new(py, &[data])?; - let table = table_class.call_method1("from_pylist", args)?; - - // Convert Arrow Table to datafusion DataFrame - let df = self.from_arrow(table, name, py)?; - Ok(df) - } - - /// Construct datafusion dataframe from Python dictionary - #[pyo3(signature = (data, name=None))] - pub fn from_pydict( - &self, - data: Bound<'_, PyDict>, - name: Option<&str>, - ) -> PyResult { - // Acquire GIL Token - let py = data.py(); - - // Instantiate pyarrow Table object & convert to Arrow Table - let table_class = py.import("pyarrow")?.getattr("Table")?; - let args = PyTuple::new(py, &[data])?; - let table = table_class.call_method1("from_pydict", args)?; - - // Convert Arrow Table to datafusion DataFrame - let df = self.from_arrow(table, name, py)?; - Ok(df) - } - - /// Construct datafusion dataframe from Arrow Table - #[pyo3(signature = (data, name=None))] - pub fn from_arrow( - &self, - data: Bound<'_, PyAny>, - name: Option<&str>, - py: Python, - ) -> PyDataFusionResult { - let (schema, batches) = - if let Ok(stream_reader) = ArrowArrayStreamReader::from_pyarrow_bound(&data) { - // Works for any object that implements __arrow_c_stream__ in pycapsule. - - let schema = stream_reader.schema().as_ref().to_owned(); - let batches = stream_reader - .collect::, arrow::error::ArrowError>>()?; - - (schema, batches) - } else if let Ok(array) = RecordBatch::from_pyarrow_bound(&data) { - // While this says RecordBatch, it will work for any object that implements - // __arrow_c_array__ and returns a StructArray. - - (array.schema().as_ref().to_owned(), vec![array]) - } else { - return Err(PyDataFusionError::Common( - "Expected either a Arrow Array or Arrow Stream in from_arrow().".to_string(), - )); - }; - - // Because create_dataframe() expects a vector of vectors of record batches - // here we need to wrap the vector of record batches in an additional vector - let list_of_batches = PyArrowType::from(vec![batches]); - self.create_dataframe(list_of_batches, name, Some(schema.into()), py) - } - - /// Construct datafusion dataframe from pandas - #[allow(clippy::wrong_self_convention)] - #[pyo3(signature = (data, name=None))] - pub fn from_pandas(&self, data: Bound<'_, PyAny>, name: Option<&str>) -> PyResult { - // Obtain GIL token - let py = data.py(); - - // Instantiate pyarrow Table object & convert to Arrow Table - let table_class = py.import("pyarrow")?.getattr("Table")?; - let args = PyTuple::new(py, &[data])?; - let table = table_class.call_method1("from_pandas", args)?; - - // Convert Arrow Table to datafusion DataFrame - let df = self.from_arrow(table, name, py)?; - Ok(df) - } - - /// Construct datafusion dataframe from polars - #[pyo3(signature = (data, name=None))] - pub fn from_polars(&self, data: Bound<'_, PyAny>, name: Option<&str>) -> PyResult { - // Convert Polars dataframe to Arrow Table - let table = data.call_method0("to_arrow")?; - - // Convert Arrow Table to datafusion DataFrame - let df = self.from_arrow(table, name, data.py())?; - Ok(df) - } - - pub fn register_table(&self, name: &str, table: Bound<'_, PyAny>) -> PyDataFusionResult<()> { - let session = self.clone().into_bound_py_any(table.py())?; - let table = PyTable::new(table, Some(session))?; - - self.ctx.register_table(name, table.table)?; - Ok(()) - } - - pub fn deregister_table(&self, name: &str) -> PyDataFusionResult<()> { - self.ctx.deregister_table(name)?; - Ok(()) - } - - pub fn register_catalog_provider_list( - &self, - mut provider: Bound, - ) -> PyDataFusionResult<()> { - if provider.hasattr("__datafusion_catalog_provider_list__")? { - let py = provider.py(); - let codec_capsule = create_logical_extension_capsule(py, self.logical_codec.as_ref())?; - provider = provider - .getattr("__datafusion_catalog_provider_list__")? - .call1((codec_capsule,))?; - } - - let provider = if let Ok(capsule) = provider.cast::().map_err(py_datafusion_err) - { - validate_pycapsule(capsule, "datafusion_catalog_provider_list")?; - - let data: NonNull = capsule - .pointer_checked(Some(c_str!("datafusion_catalog_provider_list")))? - .cast(); - let provider = unsafe { data.as_ref() }; - let provider: Arc = provider.into(); - provider as Arc - } else { - match provider.extract::() { - Ok(py_catalog_list) => py_catalog_list.catalog_list, - Err(_) => Arc::new(RustWrappedPyCatalogProviderList::new( - provider.into(), - Arc::clone(&self.logical_codec), - )) as Arc, - } - }; - - self.ctx.register_catalog_list(provider); - - Ok(()) - } - - pub fn register_catalog_provider( - &self, - name: &str, - mut provider: Bound<'_, PyAny>, - ) -> PyDataFusionResult<()> { - if provider.hasattr("__datafusion_catalog_provider__")? { - let py = provider.py(); - let codec_capsule = create_logical_extension_capsule(py, self.logical_codec.as_ref())?; - provider = provider - .getattr("__datafusion_catalog_provider__")? - .call1((codec_capsule,))?; - } - - let provider = if let Ok(capsule) = provider.cast::().map_err(py_datafusion_err) - { - validate_pycapsule(capsule, "datafusion_catalog_provider")?; - - let data: NonNull = capsule - .pointer_checked(Some(c_str!("datafusion_catalog_provider")))? - .cast(); - let provider = unsafe { data.as_ref() }; - let provider: Arc = provider.into(); - provider as Arc - } else { - match provider.extract::() { - Ok(py_catalog) => py_catalog.catalog, - Err(_) => Arc::new(RustWrappedPyCatalogProvider::new( - provider.into(), - Arc::clone(&self.logical_codec), - )) as Arc, - } - }; - - let _ = self.ctx.register_catalog(name, provider); - - Ok(()) - } - - /// Construct datafusion dataframe from Arrow Table - pub fn register_table_provider( - &self, - name: &str, - provider: Bound<'_, PyAny>, - ) -> PyDataFusionResult<()> { - // Deprecated: use `register_table` instead - self.register_table(name, provider) - } - - pub fn register_record_batches( - &self, - name: &str, - partitions: PyArrowType>>, - ) -> PyDataFusionResult<()> { - let schema = partitions.0[0][0].schema(); - let table = MemTable::try_new(schema, partitions.0)?; - self.ctx.register_table(name, Arc::new(table))?; - Ok(()) - } - - #[allow(clippy::too_many_arguments)] - #[pyo3(signature = (name, path, table_partition_cols=vec![], - parquet_pruning=true, - file_extension=".parquet", - skip_metadata=true, - schema=None, - file_sort_order=None))] - pub fn register_parquet( - &self, - name: &str, - path: &str, - table_partition_cols: Vec<(String, PyArrowType)>, - parquet_pruning: bool, - file_extension: &str, - skip_metadata: bool, - schema: Option>, - file_sort_order: Option>>, - py: Python, - ) -> PyDataFusionResult<()> { - let mut options = ParquetReadOptions::default() - .table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ) - .parquet_pruning(parquet_pruning) - .skip_metadata(skip_metadata); - options.file_extension = file_extension; - options.schema = schema.as_ref().map(|x| &x.0); - options.file_sort_order = file_sort_order - .unwrap_or_default() - .into_iter() - .map(|e| e.into_iter().map(|f| f.into()).collect()) - .collect(); - - let result = self.ctx.register_parquet(name, path, options); - wait_for_future(py, result)??; - Ok(()) - } - - #[pyo3(signature = (name, - path, - options=None))] - pub fn register_csv( - &self, - name: &str, - path: &Bound<'_, PyAny>, - options: Option<&PyCsvReadOptions>, - py: Python, - ) -> PyDataFusionResult<()> { - let options = options - .map(|opts| opts.try_into()) - .transpose()? - .unwrap_or_default(); - - if path.is_instance_of::() { - let paths = path.extract::>()?; - let result = self.register_csv_from_multiple_paths(name, paths, options); - wait_for_future(py, result)??; - } else { - let path = path.extract::()?; - let result = self.ctx.register_csv(name, &path, options); - wait_for_future(py, result)??; - } - - Ok(()) - } - - #[allow(clippy::too_many_arguments)] - #[pyo3(signature = (name, - path, - schema=None, - schema_infer_max_records=1000, - file_extension=".json", - table_partition_cols=vec![], - file_compression_type=None))] - pub fn register_json( - &self, - name: &str, - path: PathBuf, - schema: Option>, - schema_infer_max_records: usize, - file_extension: &str, - table_partition_cols: Vec<(String, PyArrowType)>, - file_compression_type: Option, - py: Python, - ) -> PyDataFusionResult<()> { - let path = path - .to_str() - .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; - - let mut options = JsonReadOptions::default() - .file_compression_type(parse_file_compression_type(file_compression_type)?) - .table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ); - options.schema_infer_max_records = schema_infer_max_records; - options.file_extension = file_extension; - options.schema = schema.as_ref().map(|x| &x.0); - - let result = self.ctx.register_json(name, path, options); - wait_for_future(py, result)??; - - Ok(()) - } - - #[allow(clippy::too_many_arguments)] - #[pyo3(signature = (name, - path, - schema=None, - file_extension=".avro", - table_partition_cols=vec![]))] - pub fn register_avro( - &self, - name: &str, - path: PathBuf, - schema: Option>, - file_extension: &str, - table_partition_cols: Vec<(String, PyArrowType)>, - py: Python, - ) -> PyDataFusionResult<()> { - let path = path - .to_str() - .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; - - let mut options = AvroReadOptions::default().table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ); - options.file_extension = file_extension; - options.schema = schema.as_ref().map(|x| &x.0); - - let result = self.ctx.register_avro(name, path, options); - wait_for_future(py, result)??; - - Ok(()) - } - - // Registers a PyArrow.Dataset - pub fn register_dataset( - &self, - name: &str, - dataset: &Bound<'_, PyAny>, - py: Python, - ) -> PyDataFusionResult<()> { - let table: Arc = Arc::new(Dataset::new(dataset, py)?); - - self.ctx.register_table(name, table)?; - - Ok(()) - } - - pub fn register_udf(&self, udf: PyScalarUDF) -> PyResult<()> { - self.ctx.register_udf(udf.function); - Ok(()) - } - - pub fn register_udaf(&self, udaf: PyAggregateUDF) -> PyResult<()> { - self.ctx.register_udaf(udaf.function); - Ok(()) - } - - pub fn register_udwf(&self, udwf: PyWindowUDF) -> PyResult<()> { - self.ctx.register_udwf(udwf.function); - Ok(()) - } - - #[pyo3(signature = (name="datafusion"))] - pub fn catalog(&self, py: Python, name: &str) -> PyResult> { - let catalog = self.ctx.catalog(name).ok_or(PyKeyError::new_err(format!( - "Catalog with name {name} doesn't exist." - )))?; - - match catalog - .as_any() - .downcast_ref::() - { - Some(wrapped_schema) => Ok(wrapped_schema.catalog_provider.clone_ref(py)), - None => Ok( - PyCatalog::new_from_parts(catalog, Arc::clone(&self.logical_codec)) - .into_py_any(py)?, - ), - } - } - - pub fn catalog_names(&self) -> HashSet { - self.ctx.catalog_names().into_iter().collect() - } - - pub fn tables(&self) -> HashSet { - self.ctx - .catalog_names() - .into_iter() - .filter_map(|name| self.ctx.catalog(&name)) - .flat_map(move |catalog| { - catalog - .schema_names() - .into_iter() - .filter_map(move |name| catalog.schema(&name)) - }) - .flat_map(|schema| schema.table_names()) - .collect() - } - - pub fn table(&self, name: &str, py: Python) -> PyResult { - let res = wait_for_future(py, self.ctx.table(name)) - .map_err(|e| PyKeyError::new_err(e.to_string()))?; - match res { - Ok(df) => Ok(PyDataFrame::new(df)), - Err(e) => { - if let datafusion::error::DataFusionError::Plan(msg) = &e - && msg.contains("No table named") - { - return Err(PyKeyError::new_err(msg.to_string())); - } - Err(py_datafusion_err(e)) - } - } - } - - pub fn table_exist(&self, name: &str) -> PyDataFusionResult { - Ok(self.ctx.table_exist(name)?) - } - - pub fn empty_table(&self) -> PyDataFusionResult { - Ok(PyDataFrame::new(self.ctx.read_empty()?)) - } - - pub fn session_id(&self) -> String { - self.ctx.session_id() - } - - #[allow(clippy::too_many_arguments)] - #[pyo3(signature = (path, schema=None, schema_infer_max_records=1000, file_extension=".json", table_partition_cols=vec![], file_compression_type=None))] - pub fn read_json( - &self, - path: PathBuf, - schema: Option>, - schema_infer_max_records: usize, - file_extension: &str, - table_partition_cols: Vec<(String, PyArrowType)>, - file_compression_type: Option, - py: Python, - ) -> PyDataFusionResult { - let path = path - .to_str() - .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; - let mut options = JsonReadOptions::default() - .table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ) - .file_compression_type(parse_file_compression_type(file_compression_type)?); - options.schema_infer_max_records = schema_infer_max_records; - options.file_extension = file_extension; - let df = if let Some(schema) = schema { - options.schema = Some(&schema.0); - let result = self.ctx.read_json(path, options); - wait_for_future(py, result)?? - } else { - let result = self.ctx.read_json(path, options); - wait_for_future(py, result)?? - }; - Ok(PyDataFrame::new(df)) - } - - #[pyo3(signature = ( - path, - options=None))] - pub fn read_csv( - &self, - path: &Bound<'_, PyAny>, - options: Option<&PyCsvReadOptions>, - py: Python, - ) -> PyDataFusionResult { - let options = options - .map(|opts| opts.try_into()) - .transpose()? - .unwrap_or_default(); - - if path.is_instance_of::() { - let paths = path.extract::>()?; - let paths = paths.iter().map(|p| p as &str).collect::>(); - let result = self.ctx.read_csv(paths, options); - let df = PyDataFrame::new(wait_for_future(py, result)??); - Ok(df) - } else { - let path = path.extract::()?; - let result = self.ctx.read_csv(path, options); - let df = PyDataFrame::new(wait_for_future(py, result)??); - Ok(df) - } - } - - #[allow(clippy::too_many_arguments)] - #[pyo3(signature = ( - path, - table_partition_cols=vec![], - parquet_pruning=true, - file_extension=".parquet", - skip_metadata=true, - schema=None, - file_sort_order=None))] - pub fn read_parquet( - &self, - path: &str, - table_partition_cols: Vec<(String, PyArrowType)>, - parquet_pruning: bool, - file_extension: &str, - skip_metadata: bool, - schema: Option>, - file_sort_order: Option>>, - py: Python, - ) -> PyDataFusionResult { - let mut options = ParquetReadOptions::default() - .table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ) - .parquet_pruning(parquet_pruning) - .skip_metadata(skip_metadata); - options.file_extension = file_extension; - options.schema = schema.as_ref().map(|x| &x.0); - options.file_sort_order = file_sort_order - .unwrap_or_default() - .into_iter() - .map(|e| e.into_iter().map(|f| f.into()).collect()) - .collect(); - - let result = self.ctx.read_parquet(path, options); - let df = PyDataFrame::new(wait_for_future(py, result)??); - Ok(df) - } - - #[allow(clippy::too_many_arguments)] - #[pyo3(signature = (path, schema=None, table_partition_cols=vec![], file_extension=".avro"))] - pub fn read_avro( - &self, - path: &str, - schema: Option>, - table_partition_cols: Vec<(String, PyArrowType)>, - file_extension: &str, - py: Python, - ) -> PyDataFusionResult { - let mut options = AvroReadOptions::default().table_partition_cols( - table_partition_cols - .into_iter() - .map(|(name, ty)| (name, ty.0)) - .collect::>(), - ); - options.file_extension = file_extension; - let df = if let Some(schema) = schema { - options.schema = Some(&schema.0); - let read_future = self.ctx.read_avro(path, options); - wait_for_future(py, read_future)?? - } else { - let read_future = self.ctx.read_avro(path, options); - wait_for_future(py, read_future)?? - }; - Ok(PyDataFrame::new(df)) - } - - pub fn read_table(&self, table: Bound<'_, PyAny>) -> PyDataFusionResult { - let session = self.clone().into_bound_py_any(table.py())?; - let table = PyTable::new(table, Some(session))?; - let df = self.ctx.read_table(table.table())?; - Ok(PyDataFrame::new(df)) - } - - fn __repr__(&self) -> PyResult { - let config = self.ctx.copied_config(); - let mut config_entries = config - .options() - .entries() - .iter() - .filter(|e| e.value.is_some()) - .map(|e| format!("{} = {}", e.key, e.value.as_ref().unwrap())) - .collect::>(); - config_entries.sort(); - Ok(format!( - "SessionContext: id={}; configs=[\n\t{}]", - self.session_id(), - config_entries.join("\n\t") - )) - } - - /// Execute a partition of an execution plan and return a stream of record batches - pub fn execute( - &self, - plan: PyExecutionPlan, - part: usize, - py: Python, - ) -> PyDataFusionResult { - let ctx: TaskContext = TaskContext::from(&self.ctx.state()); - let plan = plan.plan.clone(); - let stream = spawn_future(py, async move { plan.execute(part, Arc::new(ctx)) })?; - Ok(PyRecordBatchStream::new(stream)) - } - - pub fn __datafusion_task_context_provider__<'py>( - &self, - py: Python<'py>, - ) -> PyResult> { - let name = cr"datafusion_task_context_provider".into(); - - let ctx_provider = Arc::clone(&self.ctx) as Arc; - let ffi_ctx_provider = FFI_TaskContextProvider::from(&ctx_provider); - - PyCapsule::new(py, ffi_ctx_provider, Some(name)) - } - - pub fn __datafusion_logical_extension_codec__<'py>( - &self, - py: Python<'py>, - ) -> PyResult> { - create_logical_extension_capsule(py, self.logical_codec.as_ref()) - } - - pub fn with_logical_extension_codec<'py>( - &self, - codec: Bound<'py, PyAny>, - ) -> PyDataFusionResult { - let py = codec.py(); - let logical_codec = extract_logical_extension_codec(py, Some(codec))?; - - Ok({ - Self { - ctx: Arc::clone(&self.ctx), - logical_codec, - } - }) - } -} - -impl PySessionContext { - async fn _table(&self, name: &str) -> datafusion::common::Result { - self.ctx.table(name).await - } - - async fn register_csv_from_multiple_paths( - &self, - name: &str, - table_paths: Vec, - options: CsvReadOptions<'_>, - ) -> datafusion::common::Result<()> { - let table_paths = table_paths.to_urls()?; - let session_config = self.ctx.copied_config(); - let listing_options = - options.to_listing_options(&session_config, self.ctx.copied_table_options()); - - let option_extension = listing_options.file_extension.clone(); - - if table_paths.is_empty() { - return exec_err!("No table paths were provided"); - } - - // check if the file extension matches the expected extension - for path in &table_paths { - let file_path = path.as_str(); - if !file_path.ends_with(option_extension.clone().as_str()) && !path.is_collection() { - return exec_err!( - "File path '{file_path}' does not match the expected extension '{option_extension}'" - ); - } - } - - let resolved_schema = options - .get_resolved_schema(&session_config, self.ctx.state(), table_paths[0].clone()) - .await?; - - let config = ListingTableConfig::new_with_multi_paths(table_paths) - .with_listing_options(listing_options) - .with_schema(resolved_schema); - let table = ListingTable::try_new(config)?; - self.ctx - .register_table(TableReference::Bare { table: name.into() }, Arc::new(table))?; - Ok(()) - } - - fn default_logical_codec(ctx: &Arc) -> Arc { - let codec = Arc::new(DefaultLogicalExtensionCodec {}); - let runtime = get_tokio_runtime().0.handle().clone(); - let ctx_provider = Arc::clone(ctx) as Arc; - Arc::new(FFI_LogicalExtensionCodec::new( - codec, - Some(runtime), - &ctx_provider, - )) - } -} - -pub fn parse_file_compression_type( - file_compression_type: Option, -) -> Result { - FileCompressionType::from_str(&*file_compression_type.unwrap_or("".to_string()).as_str()) - .map_err(|_| { - PyValueError::new_err("file_compression_type must one of: gzip, bz2, xz, zstd") - }) -} - -impl From for SessionContext { - fn from(ctx: PySessionContext) -> SessionContext { - ctx.ctx.as_ref().clone() - } -} - -impl From for PySessionContext { - fn from(ctx: SessionContext) -> PySessionContext { - let ctx = Arc::new(ctx); - let logical_codec = Self::default_logical_codec(&ctx); - - PySessionContext { ctx, logical_codec } - } -} diff --git a/src/dataframe.rs b/src/dataframe.rs deleted file mode 100644 index eb1fa4a81..000000000 --- a/src/dataframe.rs +++ /dev/null @@ -1,1472 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::collections::HashMap; -use std::ffi::{CStr, CString}; -use std::ptr::NonNull; -use std::str::FromStr; -use std::sync::Arc; - -use arrow::array::{Array, ArrayRef, RecordBatch, RecordBatchReader, new_null_array}; -use arrow::compute::can_cast_types; -use arrow::error::ArrowError; -use arrow::ffi::FFI_ArrowSchema; -use arrow::ffi_stream::FFI_ArrowArrayStream; -use arrow::pyarrow::FromPyArrow; -use cstr::cstr; -use datafusion::arrow::datatypes::{Schema, SchemaRef}; -use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; -use datafusion::arrow::util::pretty; -use datafusion::catalog::TableProvider; -use datafusion::common::UnnestOptions; -use datafusion::config::{CsvOptions, ParquetColumnOptions, ParquetOptions, TableParquetOptions}; -use datafusion::dataframe::{DataFrame, DataFrameWriteOptions}; -use datafusion::error::DataFusionError; -use datafusion::execution::SendableRecordBatchStream; -use datafusion::logical_expr::SortExpr; -use datafusion::logical_expr::dml::InsertOp; -use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel}; -use datafusion::prelude::*; -use futures::{StreamExt, TryStreamExt}; -use parking_lot::Mutex; -use pyo3::PyErr; -use pyo3::exceptions::PyValueError; -use pyo3::ffi::c_str; -use pyo3::prelude::*; -use pyo3::pybacked::PyBackedStr; -use pyo3::types::{PyCapsule, PyList, PyTuple, PyTupleMethods}; - -use crate::common::data_type::PyScalarValue; -use crate::errors::{PyDataFusionError, PyDataFusionResult, py_datafusion_err}; -use crate::expr::PyExpr; -use crate::expr::sort_expr::{PySortExpr, to_sort_expressions}; -use crate::physical_plan::PyExecutionPlan; -use crate::record_batch::{PyRecordBatchStream, poll_next_batch}; -use crate::sql::logical::PyLogicalPlan; -use crate::table::{PyTable, TempViewTable}; -use crate::utils::{is_ipython_env, spawn_future, validate_pycapsule, wait_for_future}; - -/// File-level static CStr for the Arrow array stream capsule name. -static ARROW_ARRAY_STREAM_NAME: &CStr = cstr!("arrow_array_stream"); - -// Type aliases to simplify very complex types used in this file and -// avoid compiler complaints about deeply nested types in struct fields. -type CachedBatches = Option<(Vec, bool)>; -type SharedCachedBatches = Arc>; - -/// Configuration for DataFrame display formatting -#[derive(Debug, Clone)] -pub struct FormatterConfig { - /// Maximum memory in bytes to use for display (default: 2MB) - pub max_bytes: usize, - /// Minimum number of rows to display (default: 10) - pub min_rows: usize, - /// Maximum number of rows to include in __repr__ output (default: 10) - pub max_rows: usize, -} - -impl Default for FormatterConfig { - fn default() -> Self { - Self { - max_bytes: 2 * 1024 * 1024, // 2MB - min_rows: 10, - max_rows: 10, - } - } -} - -impl FormatterConfig { - /// Validates that all configuration values are positive integers. - /// - /// # Returns - /// - /// `Ok(())` if all values are valid, or an `Err` with a descriptive error message. - pub fn validate(&self) -> Result<(), String> { - if self.max_bytes == 0 { - return Err("max_bytes must be a positive integer".to_string()); - } - - if self.min_rows == 0 { - return Err("min_rows must be a positive integer".to_string()); - } - - if self.max_rows == 0 { - return Err("max_rows must be a positive integer".to_string()); - } - - if self.min_rows > self.max_rows { - return Err("min_rows must be less than or equal to max_rows".to_string()); - } - - Ok(()) - } -} - -/// Holds the Python formatter and its configuration -struct PythonFormatter<'py> { - /// The Python formatter object - formatter: Bound<'py, PyAny>, - /// The formatter configuration - config: FormatterConfig, -} - -/// Get the Python formatter and its configuration -fn get_python_formatter_with_config(py: Python) -> PyResult { - let formatter = import_python_formatter(py)?; - let config = build_formatter_config_from_python(&formatter)?; - Ok(PythonFormatter { formatter, config }) -} - -/// Get the Python formatter from the datafusion.dataframe_formatter module -fn import_python_formatter(py: Python<'_>) -> PyResult> { - let formatter_module = py.import("datafusion.dataframe_formatter")?; - let get_formatter = formatter_module.getattr("get_formatter")?; - get_formatter.call0() -} - -// Helper function to extract attributes with fallback to default -fn get_attr<'a, T>(py_object: &'a Bound<'a, PyAny>, attr_name: &str, default_value: T) -> T -where - T: for<'py> pyo3::FromPyObject<'py, 'py> + Clone, -{ - py_object - .getattr(attr_name) - .and_then(|v| v.extract::().map_err(Into::::into)) - .unwrap_or_else(|_| default_value.clone()) -} - -/// Helper function to create a FormatterConfig from a Python formatter object -fn build_formatter_config_from_python(formatter: &Bound<'_, PyAny>) -> PyResult { - let default_config = FormatterConfig::default(); - let max_bytes = get_attr(formatter, "max_memory_bytes", default_config.max_bytes); - let min_rows = get_attr(formatter, "min_rows", default_config.min_rows); - - // Backward compatibility: Try max_rows first (new name), fall back to repr_rows (deprecated), - // then use default. This ensures backward compatibility with custom formatter implementations - // during the deprecation period. - let max_rows = get_attr(formatter, "max_rows", 0usize); - let max_rows = if max_rows > 0 { - // max_rows attribute exists and has a value - max_rows - } else { - // Try the deprecated repr_rows attribute - let repr_rows = get_attr(formatter, "repr_rows", 0usize); - if repr_rows > 0 { - repr_rows - } else { - // Use default - default_config.max_rows - } - }; - - let config = FormatterConfig { - max_bytes, - min_rows, - max_rows, - }; - - // Return the validated config, converting String error to PyErr - config.validate().map_err(PyValueError::new_err)?; - Ok(config) -} - -/// Python mapping of `ParquetOptions` (includes just the writer-related options). -#[pyclass( - from_py_object, - frozen, - name = "ParquetWriterOptions", - module = "datafusion", - subclass -)] -#[derive(Clone, Default)] -pub struct PyParquetWriterOptions { - options: ParquetOptions, -} - -#[pymethods] -impl PyParquetWriterOptions { - #[new] - #[allow(clippy::too_many_arguments)] - pub fn new( - data_pagesize_limit: usize, - write_batch_size: usize, - writer_version: &str, - skip_arrow_metadata: bool, - compression: Option, - dictionary_enabled: Option, - dictionary_page_size_limit: usize, - statistics_enabled: Option, - max_row_group_size: usize, - created_by: String, - column_index_truncate_length: Option, - statistics_truncate_length: Option, - data_page_row_count_limit: usize, - encoding: Option, - bloom_filter_on_write: bool, - bloom_filter_fpp: Option, - bloom_filter_ndv: Option, - allow_single_file_parallelism: bool, - maximum_parallel_row_group_writers: usize, - maximum_buffered_record_batches_per_stream: usize, - ) -> PyResult { - let writer_version = - datafusion::common::parquet_config::DFParquetWriterVersion::from_str(writer_version) - .map_err(py_datafusion_err)?; - Ok(Self { - options: ParquetOptions { - data_pagesize_limit, - write_batch_size, - writer_version, - skip_arrow_metadata, - compression, - dictionary_enabled, - dictionary_page_size_limit, - statistics_enabled, - max_row_group_size, - created_by, - column_index_truncate_length, - statistics_truncate_length, - data_page_row_count_limit, - encoding, - bloom_filter_on_write, - bloom_filter_fpp, - bloom_filter_ndv, - allow_single_file_parallelism, - maximum_parallel_row_group_writers, - maximum_buffered_record_batches_per_stream, - ..Default::default() - }, - }) - } -} - -/// Python mapping of `ParquetColumnOptions`. -#[pyclass( - from_py_object, - frozen, - name = "ParquetColumnOptions", - module = "datafusion", - subclass -)] -#[derive(Clone, Default)] -pub struct PyParquetColumnOptions { - options: ParquetColumnOptions, -} - -#[pymethods] -impl PyParquetColumnOptions { - #[new] - pub fn new( - bloom_filter_enabled: Option, - encoding: Option, - dictionary_enabled: Option, - compression: Option, - statistics_enabled: Option, - bloom_filter_fpp: Option, - bloom_filter_ndv: Option, - ) -> Self { - Self { - options: ParquetColumnOptions { - bloom_filter_enabled, - encoding, - dictionary_enabled, - compression, - statistics_enabled, - bloom_filter_fpp, - bloom_filter_ndv, - }, - } - } -} - -/// A PyDataFrame is a representation of a logical plan and an API to compose statements. -/// Use it to build a plan and `.collect()` to execute the plan and collect the result. -/// The actual execution of a plan runs natively on Rust and Arrow on a multi-threaded environment. -#[pyclass( - from_py_object, - name = "DataFrame", - module = "datafusion", - subclass, - frozen -)] -#[derive(Clone)] -pub struct PyDataFrame { - df: Arc, - - // In IPython environment cache batches between __repr__ and _repr_html_ calls. - batches: SharedCachedBatches, -} - -impl PyDataFrame { - /// creates a new PyDataFrame - pub fn new(df: DataFrame) -> Self { - Self { - df: Arc::new(df), - batches: Arc::new(Mutex::new(None)), - } - } - - /// Return a clone of the inner Arc for crate-local callers. - pub(crate) fn inner_df(&self) -> Arc { - Arc::clone(&self.df) - } - - fn prepare_repr_string<'py>( - &self, - py: Python<'py>, - as_html: bool, - ) -> PyDataFusionResult { - // Get the Python formatter and config - let PythonFormatter { formatter, config } = get_python_formatter_with_config(py)?; - - let is_ipython = *is_ipython_env(py); - - let (cached_batches, should_cache) = { - let mut cache = self.batches.lock(); - let should_cache = is_ipython && cache.is_none(); - let batches = cache.take(); - (batches, should_cache) - }; - - let (batches, has_more) = match cached_batches { - Some(b) => b, - None => wait_for_future( - py, - collect_record_batches_to_display(self.df.as_ref().clone(), config), - )??, - }; - - if batches.is_empty() { - // This should not be reached, but do it for safety since we index into the vector below - return Ok("No data to display".to_string()); - } - - let table_uuid = uuid::Uuid::new_v4().to_string(); - - // Convert record batches to Py list - let py_batches = batches - .iter() - .map(|rb| rb.to_pyarrow(py)) - .collect::>>>()?; - - let py_schema = self.schema().into_pyobject(py)?; - - let kwargs = pyo3::types::PyDict::new(py); - let py_batches_list = PyList::new(py, py_batches.as_slice())?; - kwargs.set_item("batches", py_batches_list)?; - kwargs.set_item("schema", py_schema)?; - kwargs.set_item("has_more", has_more)?; - kwargs.set_item("table_uuid", table_uuid)?; - - let method_name = match as_html { - true => "format_html", - false => "format_str", - }; - - let html_result = formatter.call_method(method_name, (), Some(&kwargs))?; - let html_str: String = html_result.extract()?; - - if should_cache { - let mut cache = self.batches.lock(); - *cache = Some((batches.clone(), has_more)); - } - - Ok(html_str) - } - - async fn collect_column_inner(&self, column: &str) -> Result { - let batches = self - .df - .as_ref() - .clone() - .select_columns(&[column])? - .collect() - .await?; - - let arrays = batches - .iter() - .map(|b| b.column(0).as_ref()) - .collect::>(); - - arrow_select::concat::concat(&arrays).map_err(Into::into) - } -} - -/// Synchronous wrapper around partitioned [`SendableRecordBatchStream`]s used -/// for the `__arrow_c_stream__` implementation. -/// -/// It drains each partition's stream sequentially, yielding record batches in -/// their original partition order. When a `projection` is set, each batch is -/// converted via `record_batch_into_schema` to apply schema changes per batch. -struct PartitionedDataFrameStreamReader { - streams: Vec, - schema: SchemaRef, - projection: Option, - current: usize, -} - -impl Iterator for PartitionedDataFrameStreamReader { - type Item = Result; - - fn next(&mut self) -> Option { - while self.current < self.streams.len() { - let stream = &mut self.streams[self.current]; - let fut = poll_next_batch(stream); - let result = Python::attach(|py| wait_for_future(py, fut)); - - match result { - Ok(Ok(Some(batch))) => { - let batch = if let Some(ref schema) = self.projection { - match record_batch_into_schema(batch, schema.as_ref()) { - Ok(b) => b, - Err(e) => return Some(Err(e)), - } - } else { - batch - }; - return Some(Ok(batch)); - } - Ok(Ok(None)) => { - self.current += 1; - continue; - } - Ok(Err(e)) => { - return Some(Err(ArrowError::ExternalError(Box::new(e)))); - } - Err(e) => { - return Some(Err(ArrowError::ExternalError(Box::new(e)))); - } - } - } - - None - } -} - -impl RecordBatchReader for PartitionedDataFrameStreamReader { - fn schema(&self) -> SchemaRef { - self.schema.clone() - } -} - -#[pymethods] -impl PyDataFrame { - /// Enable selection for `df[col]`, `df[col1, col2, col3]`, and `df[[col1, col2, col3]]` - fn __getitem__(&self, key: Bound<'_, PyAny>) -> PyDataFusionResult { - if let Ok(key) = key.extract::() { - // df[col] - self.select_columns(vec![key]) - } else if let Ok(tuple) = key.cast::() { - // df[col1, col2, col3] - let keys = tuple - .iter() - .map(|item| item.extract::()) - .collect::>>()?; - self.select_columns(keys) - } else if let Ok(keys) = key.extract::>() { - // df[[col1, col2, col3]] - self.select_columns(keys) - } else { - let message = "DataFrame can only be indexed by string index or indices".to_string(); - Err(PyDataFusionError::Common(message)) - } - } - - fn __repr__(&self, py: Python) -> PyDataFusionResult { - self.prepare_repr_string(py, false) - } - - #[staticmethod] - #[expect(unused_variables)] - fn default_str_repr<'py>( - batches: Vec>, - schema: &Bound<'py, PyAny>, - has_more: bool, - table_uuid: &str, - ) -> PyResult { - let batches = batches - .into_iter() - .map(|batch| RecordBatch::from_pyarrow_bound(&batch)) - .collect::>>()? - .into_iter() - .filter(|batch| batch.num_rows() > 0) - .collect::>(); - - if batches.is_empty() { - return Ok("No data to display".to_owned()); - } - - let batches_as_displ = - pretty::pretty_format_batches(&batches).map_err(py_datafusion_err)?; - - let additional_str = match has_more { - true => "\nData truncated.", - false => "", - }; - - Ok(format!("DataFrame()\n{batches_as_displ}{additional_str}")) - } - - fn _repr_html_(&self, py: Python) -> PyDataFusionResult { - self.prepare_repr_string(py, true) - } - - /// Calculate summary statistics for a DataFrame - fn describe(&self, py: Python) -> PyDataFusionResult { - let df = self.df.as_ref().clone(); - let stat_df = wait_for_future(py, df.describe())??; - Ok(Self::new(stat_df)) - } - - /// Returns the schema from the logical plan - fn schema(&self) -> PyArrowType { - PyArrowType(self.df.schema().as_arrow().clone()) - } - - /// Convert this DataFrame into a Table Provider that can be used in register_table - /// By convention, into_... methods consume self and return the new object. - /// Disabling the clippy lint, so we can use &self - /// because we're working with Python bindings - /// where objects are shared - #[allow(clippy::wrong_self_convention)] - pub fn into_view(&self, temporary: bool) -> PyDataFusionResult { - let table_provider = if temporary { - Arc::new(TempViewTable::new(Arc::clone(&self.df))) as Arc - } else { - // Call the underlying Rust DataFrame::into_view method. - // Note that the Rust method consumes self; here we clone the inner Arc - // so that we don't invalidate this PyDataFrame. - self.df.as_ref().clone().into_view() - }; - Ok(PyTable::from(table_provider)) - } - - #[pyo3(signature = (*args))] - fn select_columns(&self, args: Vec) -> PyDataFusionResult { - let args = args.iter().map(|s| s.as_ref()).collect::>(); - let df = self.df.as_ref().clone().select_columns(&args)?; - Ok(Self::new(df)) - } - - #[pyo3(signature = (*args))] - fn select_exprs(&self, args: Vec) -> PyDataFusionResult { - let args = args.iter().map(|s| s.as_ref()).collect::>(); - let df = self.df.as_ref().clone().select_exprs(&args)?; - Ok(Self::new(df)) - } - - #[pyo3(signature = (*args))] - fn select(&self, args: Vec) -> PyDataFusionResult { - let expr: Vec = args.into_iter().map(|e| e.into()).collect(); - let df = self.df.as_ref().clone().select(expr)?; - Ok(Self::new(df)) - } - - #[pyo3(signature = (*args))] - fn drop(&self, args: Vec) -> PyDataFusionResult { - let cols = args.iter().map(|s| s.as_ref()).collect::>(); - let df = self.df.as_ref().clone().drop_columns(&cols)?; - Ok(Self::new(df)) - } - - fn filter(&self, predicate: PyExpr) -> PyDataFusionResult { - let df = self.df.as_ref().clone().filter(predicate.into())?; - Ok(Self::new(df)) - } - - fn parse_sql_expr(&self, expr: PyBackedStr) -> PyDataFusionResult { - self.df - .as_ref() - .parse_sql_expr(&expr) - .map(PyExpr::from) - .map_err(PyDataFusionError::from) - } - - fn with_column(&self, name: &str, expr: PyExpr) -> PyDataFusionResult { - let df = self.df.as_ref().clone().with_column(name, expr.into())?; - Ok(Self::new(df)) - } - - fn with_columns(&self, exprs: Vec) -> PyDataFusionResult { - let mut df = self.df.as_ref().clone(); - for expr in exprs { - let expr: Expr = expr.into(); - let name = format!("{}", expr.schema_name()); - df = df.with_column(name.as_str(), expr)? - } - Ok(Self::new(df)) - } - - /// Rename one column by applying a new projection. This is a no-op if the column to be - /// renamed does not exist. - fn with_column_renamed(&self, old_name: &str, new_name: &str) -> PyDataFusionResult { - let df = self - .df - .as_ref() - .clone() - .with_column_renamed(old_name, new_name)?; - Ok(Self::new(df)) - } - - fn aggregate(&self, group_by: Vec, aggs: Vec) -> PyDataFusionResult { - let group_by = group_by.into_iter().map(|e| e.into()).collect(); - let aggs = aggs.into_iter().map(|e| e.into()).collect(); - let df = self.df.as_ref().clone().aggregate(group_by, aggs)?; - Ok(Self::new(df)) - } - - #[pyo3(signature = (*exprs))] - fn sort(&self, exprs: Vec) -> PyDataFusionResult { - let exprs = to_sort_expressions(exprs); - let df = self.df.as_ref().clone().sort(exprs)?; - Ok(Self::new(df)) - } - - #[pyo3(signature = (count, offset=0))] - fn limit(&self, count: usize, offset: usize) -> PyDataFusionResult { - let df = self.df.as_ref().clone().limit(offset, Some(count))?; - Ok(Self::new(df)) - } - - /// Executes the plan, returning a list of `RecordBatch`es. - /// Unless some order is specified in the plan, there is no - /// guarantee of the order of the result. - fn collect<'py>(&self, py: Python<'py>) -> PyResult>> { - let batches = wait_for_future(py, self.df.as_ref().clone().collect())? - .map_err(PyDataFusionError::from)?; - // cannot use PyResult> return type due to - // https://github.com/PyO3/pyo3/issues/1813 - batches.into_iter().map(|rb| rb.to_pyarrow(py)).collect() - } - - /// Cache DataFrame. - fn cache(&self, py: Python) -> PyDataFusionResult { - let df = wait_for_future(py, self.df.as_ref().clone().cache())??; - Ok(Self::new(df)) - } - - /// Executes this DataFrame and collects all results into a vector of vector of RecordBatch - /// maintaining the input partitioning. - fn collect_partitioned<'py>(&self, py: Python<'py>) -> PyResult>>> { - let batches = wait_for_future(py, self.df.as_ref().clone().collect_partitioned())? - .map_err(PyDataFusionError::from)?; - - batches - .into_iter() - .map(|rbs| rbs.into_iter().map(|rb| rb.to_pyarrow(py)).collect()) - .collect() - } - - fn collect_column<'py>(&self, py: Python<'py>, column: &str) -> PyResult> { - wait_for_future(py, self.collect_column_inner(column))? - .map_err(PyDataFusionError::from)? - .to_data() - .to_pyarrow(py) - } - - /// Print the result, 20 lines by default - #[pyo3(signature = (num=20))] - fn show(&self, py: Python, num: usize) -> PyDataFusionResult<()> { - let df = self.df.as_ref().clone().limit(0, Some(num))?; - print_dataframe(py, df) - } - - /// Filter out duplicate rows - fn distinct(&self) -> PyDataFusionResult { - let df = self.df.as_ref().clone().distinct()?; - Ok(Self::new(df)) - } - - fn join( - &self, - right: PyDataFrame, - how: &str, - left_on: Vec, - right_on: Vec, - coalesce_keys: bool, - ) -> PyDataFusionResult { - let join_type = match how { - "inner" => JoinType::Inner, - "left" => JoinType::Left, - "right" => JoinType::Right, - "full" => JoinType::Full, - "semi" => JoinType::LeftSemi, - "anti" => JoinType::LeftAnti, - how => { - return Err(PyDataFusionError::Common(format!( - "The join type {how} does not exist or is not implemented" - ))); - } - }; - - let left_keys = left_on.iter().map(|s| s.as_ref()).collect::>(); - let right_keys = right_on.iter().map(|s| s.as_ref()).collect::>(); - - let mut df = self.df.as_ref().clone().join( - right.df.as_ref().clone(), - join_type, - &left_keys, - &right_keys, - None, - )?; - - if coalesce_keys { - let mutual_keys = left_keys - .iter() - .zip(right_keys.iter()) - .filter(|(l, r)| l == r) - .map(|(key, _)| *key) - .collect::>(); - - let fields_to_coalesce = mutual_keys - .iter() - .map(|name| { - let qualified_fields = df - .logical_plan() - .schema() - .qualified_fields_with_unqualified_name(name); - (*name, qualified_fields) - }) - .filter(|(_, fields)| fields.len() == 2) - .collect::>(); - - let expr: Vec = df - .logical_plan() - .schema() - .fields() - .into_iter() - .enumerate() - .map(|(idx, _)| df.logical_plan().schema().qualified_field(idx)) - .filter_map(|(qualifier, field)| { - if let Some((key_name, qualified_fields)) = fields_to_coalesce - .iter() - .find(|(_, qf)| qf.contains(&(qualifier, field))) - { - // Only add the coalesce expression once (when we encounter the first field) - // Skip the second field (it's already included in to coalesce) - if (qualifier, field) == qualified_fields[0] { - let left_col = Expr::Column(Column::from(qualified_fields[0])); - let right_col = Expr::Column(Column::from(qualified_fields[1])); - return Some(coalesce(vec![left_col, right_col]).alias(*key_name)); - } - None - } else { - Some(Expr::Column(Column::from((qualifier, field)))) - } - }) - .collect(); - df = df.select(expr)?; - } - - Ok(Self::new(df)) - } - - fn join_on( - &self, - right: PyDataFrame, - on_exprs: Vec, - how: &str, - ) -> PyDataFusionResult { - let join_type = match how { - "inner" => JoinType::Inner, - "left" => JoinType::Left, - "right" => JoinType::Right, - "full" => JoinType::Full, - "semi" => JoinType::LeftSemi, - "anti" => JoinType::LeftAnti, - how => { - return Err(PyDataFusionError::Common(format!( - "The join type {how} does not exist or is not implemented" - ))); - } - }; - let exprs: Vec = on_exprs.into_iter().map(|e| e.into()).collect(); - - let df = self - .df - .as_ref() - .clone() - .join_on(right.df.as_ref().clone(), join_type, exprs)?; - Ok(Self::new(df)) - } - - /// Print the query plan - #[pyo3(signature = (verbose=false, analyze=false))] - fn explain(&self, py: Python, verbose: bool, analyze: bool) -> PyDataFusionResult<()> { - let df = self.df.as_ref().clone().explain(verbose, analyze)?; - print_dataframe(py, df) - } - - /// Get the logical plan for this `DataFrame` - fn logical_plan(&self) -> PyResult { - Ok(self.df.as_ref().clone().logical_plan().clone().into()) - } - - /// Get the optimized logical plan for this `DataFrame` - fn optimized_logical_plan(&self) -> PyDataFusionResult { - Ok(self.df.as_ref().clone().into_optimized_plan()?.into()) - } - - /// Get the execution plan for this `DataFrame` - fn execution_plan(&self, py: Python) -> PyDataFusionResult { - let plan = wait_for_future(py, self.df.as_ref().clone().create_physical_plan())??; - Ok(plan.into()) - } - - /// Repartition a `DataFrame` based on a logical partitioning scheme. - fn repartition(&self, num: usize) -> PyDataFusionResult { - let new_df = self - .df - .as_ref() - .clone() - .repartition(Partitioning::RoundRobinBatch(num))?; - Ok(Self::new(new_df)) - } - - /// Repartition a `DataFrame` based on a logical partitioning scheme. - #[pyo3(signature = (*args, num))] - fn repartition_by_hash(&self, args: Vec, num: usize) -> PyDataFusionResult { - let expr = args.into_iter().map(|py_expr| py_expr.into()).collect(); - let new_df = self - .df - .as_ref() - .clone() - .repartition(Partitioning::Hash(expr, num))?; - Ok(Self::new(new_df)) - } - - /// Calculate the union of two `DataFrame`s, preserving duplicate rows.The - /// two `DataFrame`s must have exactly the same schema - #[pyo3(signature = (py_df, distinct=false))] - fn union(&self, py_df: PyDataFrame, distinct: bool) -> PyDataFusionResult { - let new_df = if distinct { - self.df - .as_ref() - .clone() - .union_distinct(py_df.df.as_ref().clone())? - } else { - self.df.as_ref().clone().union(py_df.df.as_ref().clone())? - }; - - Ok(Self::new(new_df)) - } - - /// Calculate the distinct union of two `DataFrame`s. The - /// two `DataFrame`s must have exactly the same schema - fn union_distinct(&self, py_df: PyDataFrame) -> PyDataFusionResult { - let new_df = self - .df - .as_ref() - .clone() - .union_distinct(py_df.df.as_ref().clone())?; - Ok(Self::new(new_df)) - } - - #[pyo3(signature = (column, preserve_nulls=true))] - fn unnest_column(&self, column: &str, preserve_nulls: bool) -> PyDataFusionResult { - // TODO: expose RecursionUnnestOptions - // REF: https://github.com/apache/datafusion/pull/11577 - let unnest_options = UnnestOptions::default().with_preserve_nulls(preserve_nulls); - let df = self - .df - .as_ref() - .clone() - .unnest_columns_with_options(&[column], unnest_options)?; - Ok(Self::new(df)) - } - - #[pyo3(signature = (columns, preserve_nulls=true))] - fn unnest_columns( - &self, - columns: Vec, - preserve_nulls: bool, - ) -> PyDataFusionResult { - // TODO: expose RecursionUnnestOptions - // REF: https://github.com/apache/datafusion/pull/11577 - let unnest_options = UnnestOptions::default().with_preserve_nulls(preserve_nulls); - let cols = columns.iter().map(|s| s.as_ref()).collect::>(); - let df = self - .df - .as_ref() - .clone() - .unnest_columns_with_options(&cols, unnest_options)?; - Ok(Self::new(df)) - } - - /// Calculate the intersection of two `DataFrame`s. The two `DataFrame`s must have exactly the same schema - fn intersect(&self, py_df: PyDataFrame) -> PyDataFusionResult { - let new_df = self - .df - .as_ref() - .clone() - .intersect(py_df.df.as_ref().clone())?; - Ok(Self::new(new_df)) - } - - /// Calculate the exception of two `DataFrame`s. The two `DataFrame`s must have exactly the same schema - fn except_all(&self, py_df: PyDataFrame) -> PyDataFusionResult { - let new_df = self.df.as_ref().clone().except(py_df.df.as_ref().clone())?; - Ok(Self::new(new_df)) - } - - /// Write a `DataFrame` to a CSV file. - fn write_csv( - &self, - py: Python, - path: &str, - with_header: bool, - write_options: Option, - ) -> PyDataFusionResult<()> { - let csv_options = CsvOptions { - has_header: Some(with_header), - ..Default::default() - }; - let write_options = write_options - .map(DataFrameWriteOptions::from) - .unwrap_or_default(); - - wait_for_future( - py, - self.df - .as_ref() - .clone() - .write_csv(path, write_options, Some(csv_options)), - )??; - Ok(()) - } - - /// Write a `DataFrame` to a Parquet file. - #[pyo3(signature = ( - path, - compression="zstd", - compression_level=None, - write_options=None, - ))] - fn write_parquet( - &self, - path: &str, - compression: &str, - compression_level: Option, - write_options: Option, - py: Python, - ) -> PyDataFusionResult<()> { - fn verify_compression_level(cl: Option) -> Result { - cl.ok_or(PyValueError::new_err("compression_level is not defined")) - } - - let _validated = match compression.to_lowercase().as_str() { - "snappy" => Compression::SNAPPY, - "gzip" => Compression::GZIP( - GzipLevel::try_new(compression_level.unwrap_or(6)) - .map_err(|e| PyValueError::new_err(format!("{e}")))?, - ), - "brotli" => Compression::BROTLI( - BrotliLevel::try_new(verify_compression_level(compression_level)?) - .map_err(|e| PyValueError::new_err(format!("{e}")))?, - ), - "zstd" => Compression::ZSTD( - ZstdLevel::try_new(verify_compression_level(compression_level)? as i32) - .map_err(|e| PyValueError::new_err(format!("{e}")))?, - ), - "lzo" => Compression::LZO, - "lz4" => Compression::LZ4, - "lz4_raw" => Compression::LZ4_RAW, - "uncompressed" => Compression::UNCOMPRESSED, - _ => { - return Err(PyDataFusionError::Common(format!( - "Unrecognized compression type {compression}" - ))); - } - }; - - let mut compression_string = compression.to_string(); - if let Some(level) = compression_level { - compression_string.push_str(&format!("({level})")); - } - - let mut options = TableParquetOptions::default(); - options.global.compression = Some(compression_string); - let write_options = write_options - .map(DataFrameWriteOptions::from) - .unwrap_or_default(); - - wait_for_future( - py, - self.df - .as_ref() - .clone() - .write_parquet(path, write_options, Option::from(options)), - )??; - Ok(()) - } - - /// Write a `DataFrame` to a Parquet file, using advanced options. - fn write_parquet_with_options( - &self, - path: &str, - options: PyParquetWriterOptions, - column_specific_options: HashMap, - write_options: Option, - py: Python, - ) -> PyDataFusionResult<()> { - let table_options = TableParquetOptions { - global: options.options, - column_specific_options: column_specific_options - .into_iter() - .map(|(k, v)| (k, v.options)) - .collect(), - ..Default::default() - }; - let write_options = write_options - .map(DataFrameWriteOptions::from) - .unwrap_or_default(); - wait_for_future( - py, - self.df.as_ref().clone().write_parquet( - path, - write_options, - Option::from(table_options), - ), - )??; - Ok(()) - } - - /// Executes a query and writes the results to a partitioned JSON file. - fn write_json( - &self, - path: &str, - py: Python, - write_options: Option, - ) -> PyDataFusionResult<()> { - let write_options = write_options - .map(DataFrameWriteOptions::from) - .unwrap_or_default(); - wait_for_future( - py, - self.df - .as_ref() - .clone() - .write_json(path, write_options, None), - )??; - Ok(()) - } - - fn write_table( - &self, - py: Python, - table_name: &str, - write_options: Option, - ) -> PyDataFusionResult<()> { - let write_options = write_options - .map(DataFrameWriteOptions::from) - .unwrap_or_default(); - wait_for_future( - py, - self.df - .as_ref() - .clone() - .write_table(table_name, write_options), - )??; - Ok(()) - } - - /// Convert to Arrow Table - /// Collect the batches and pass to Arrow Table - fn to_arrow_table(&self, py: Python<'_>) -> PyResult> { - let batches = self.collect(py)?.into_pyobject(py)?; - - // only use the DataFrame's schema if there are no batches, otherwise let the schema be - // determined from the batches (avoids some inconsistencies with nullable columns) - let args = if batches.len()? == 0 { - let schema = self.schema().into_pyobject(py)?; - PyTuple::new(py, &[batches, schema])? - } else { - PyTuple::new(py, &[batches])? - }; - - // Instantiate pyarrow Table object and use its from_batches method - let table_class = py.import("pyarrow")?.getattr("Table")?; - let table: Py = table_class.call_method1("from_batches", args)?.into(); - Ok(table) - } - - #[pyo3(signature = (requested_schema=None))] - fn __arrow_c_stream__<'py>( - &'py self, - py: Python<'py>, - requested_schema: Option>, - ) -> PyDataFusionResult> { - let df = self.df.as_ref().clone(); - let streams = spawn_future(py, async move { df.execute_stream_partitioned().await })?; - - let mut schema: Schema = self.df.schema().to_owned().as_arrow().clone(); - let mut projection: Option = None; - - if let Some(schema_capsule) = requested_schema { - validate_pycapsule(&schema_capsule, "arrow_schema")?; - - let data: NonNull = schema_capsule - .pointer_checked(Some(c_str!("arrow_schema")))? - .cast(); - let schema_ptr = unsafe { data.as_ref() }; - let desired_schema = Schema::try_from(schema_ptr)?; - - schema = project_schema(schema, desired_schema)?; - projection = Some(Arc::new(schema.clone())); - } - - let schema_ref = Arc::new(schema.clone()); - - let reader = PartitionedDataFrameStreamReader { - streams, - schema: schema_ref, - projection, - current: 0, - }; - let reader: Box = Box::new(reader); - - // Create the Arrow stream and wrap it in a PyCapsule. The default - // destructor provided by PyO3 will drop the stream unless ownership is - // transferred to PyArrow during import. - let stream = FFI_ArrowArrayStream::new(reader); - let name = CString::new(ARROW_ARRAY_STREAM_NAME.to_bytes()).unwrap(); - let capsule = PyCapsule::new(py, stream, Some(name))?; - Ok(capsule) - } - - fn execute_stream(&self, py: Python) -> PyDataFusionResult { - let df = self.df.as_ref().clone(); - let stream = spawn_future(py, async move { df.execute_stream().await })?; - Ok(PyRecordBatchStream::new(stream)) - } - - fn execute_stream_partitioned(&self, py: Python) -> PyResult> { - let df = self.df.as_ref().clone(); - let streams = spawn_future(py, async move { df.execute_stream_partitioned().await })?; - Ok(streams.into_iter().map(PyRecordBatchStream::new).collect()) - } - - /// Convert to pandas dataframe with pyarrow - /// Collect the batches, pass to Arrow Table & then convert to Pandas DataFrame - fn to_pandas(&self, py: Python<'_>) -> PyResult> { - let table = self.to_arrow_table(py)?; - - // See also: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pandas - let result = table.call_method0(py, "to_pandas")?; - Ok(result) - } - - /// Convert to Python list using pyarrow - /// Each list item represents one row encoded as dictionary - fn to_pylist(&self, py: Python<'_>) -> PyResult> { - let table = self.to_arrow_table(py)?; - - // See also: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pylist - let result = table.call_method0(py, "to_pylist")?; - Ok(result) - } - - /// Convert to Python dictionary using pyarrow - /// Each dictionary key is a column and the dictionary value represents the column values - fn to_pydict(&self, py: Python) -> PyResult> { - let table = self.to_arrow_table(py)?; - - // See also: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pydict - let result = table.call_method0(py, "to_pydict")?; - Ok(result) - } - - /// Convert to polars dataframe with pyarrow - /// Collect the batches, pass to Arrow Table & then convert to polars DataFrame - fn to_polars(&self, py: Python<'_>) -> PyResult> { - let table = self.to_arrow_table(py)?; - let dataframe = py.import("polars")?.getattr("DataFrame")?; - let args = PyTuple::new(py, &[table])?; - let result: Py = dataframe.call1(args)?.into(); - Ok(result) - } - - // Executes this DataFrame to get the total number of rows. - fn count(&self, py: Python) -> PyDataFusionResult { - Ok(wait_for_future(py, self.df.as_ref().clone().count())??) - } - - /// Fill null values with a specified value for specific columns - #[pyo3(signature = (value, columns=None))] - fn fill_null( - &self, - value: Py, - columns: Option>, - py: Python, - ) -> PyDataFusionResult { - let scalar_value: PyScalarValue = value.extract(py)?; - - let cols = match columns { - Some(col_names) => col_names.iter().map(|c| c.to_string()).collect(), - None => Vec::new(), // Empty vector means fill null for all columns - }; - - let df = self.df.as_ref().clone().fill_null(scalar_value.0, cols)?; - Ok(Self::new(df)) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass( - from_py_object, - frozen, - eq, - eq_int, - name = "InsertOp", - module = "datafusion" -)] -pub enum PyInsertOp { - APPEND, - REPLACE, - OVERWRITE, -} - -impl From for InsertOp { - fn from(value: PyInsertOp) -> Self { - match value { - PyInsertOp::APPEND => InsertOp::Append, - PyInsertOp::REPLACE => InsertOp::Replace, - PyInsertOp::OVERWRITE => InsertOp::Overwrite, - } - } -} - -#[derive(Debug, Clone)] -#[pyclass( - from_py_object, - frozen, - name = "DataFrameWriteOptions", - module = "datafusion" -)] -pub struct PyDataFrameWriteOptions { - insert_operation: InsertOp, - single_file_output: bool, - partition_by: Vec, - sort_by: Vec, -} - -impl From for DataFrameWriteOptions { - fn from(value: PyDataFrameWriteOptions) -> Self { - DataFrameWriteOptions::new() - .with_insert_operation(value.insert_operation) - .with_single_file_output(value.single_file_output) - .with_partition_by(value.partition_by) - .with_sort_by(value.sort_by) - } -} - -#[pymethods] -impl PyDataFrameWriteOptions { - #[new] - fn new( - insert_operation: Option, - single_file_output: bool, - partition_by: Option>, - sort_by: Option>, - ) -> Self { - let insert_operation = insert_operation.map(Into::into).unwrap_or(InsertOp::Append); - let sort_by = sort_by - .unwrap_or_default() - .into_iter() - .map(Into::into) - .collect(); - Self { - insert_operation, - single_file_output, - partition_by: partition_by.unwrap_or_default(), - sort_by, - } - } -} - -/// Print DataFrame -fn print_dataframe(py: Python, df: DataFrame) -> PyDataFusionResult<()> { - // Get string representation of record batches - let batches = wait_for_future(py, df.collect())??; - let result = if batches.is_empty() { - "DataFrame has no rows".to_string() - } else { - match pretty::pretty_format_batches(&batches) { - Ok(batch) => format!("DataFrame()\n{batch}"), - Err(err) => format!("Error: {:?}", err.to_string()), - } - }; - - // Import the Python 'builtins' module to access the print function - // Note that println! does not print to the Python debug console and is not visible in notebooks for instance - let print = py.import("builtins")?.getattr("print")?; - print.call1((result,))?; - Ok(()) -} - -fn project_schema(from_schema: Schema, to_schema: Schema) -> Result { - let merged_schema = Schema::try_merge(vec![from_schema, to_schema.clone()])?; - - let project_indices: Vec = to_schema - .fields - .iter() - .map(|field| field.name()) - .filter_map(|field_name| merged_schema.index_of(field_name).ok()) - .collect(); - - merged_schema.project(&project_indices) -} -// NOTE: `arrow::compute::cast` in combination with `RecordBatch::try_select` or -// DataFusion's `schema::cast_record_batch` do not fully cover the required -// transformations here. They will not create missing columns and may insert -// nulls for non-nullable fields without erroring. To maintain current behavior -// we perform the casting and null checks manually. -fn record_batch_into_schema( - record_batch: RecordBatch, - schema: &Schema, -) -> Result { - let schema = Arc::new(schema.clone()); - let base_schema = record_batch.schema(); - if base_schema.fields().is_empty() { - // Nothing to project - return Ok(RecordBatch::new_empty(schema)); - } - - let array_size = record_batch.column(0).len(); - let mut data_arrays = Vec::with_capacity(schema.fields().len()); - - for field in schema.fields() { - let desired_data_type = field.data_type(); - if let Some(original_data) = record_batch.column_by_name(field.name()) { - let original_data_type = original_data.data_type(); - - if can_cast_types(original_data_type, desired_data_type) { - data_arrays.push(arrow::compute::kernels::cast( - original_data, - desired_data_type, - )?); - } else if field.is_nullable() { - data_arrays.push(new_null_array(desired_data_type, array_size)); - } else { - return Err(ArrowError::CastError(format!( - "Attempting to cast to non-nullable and non-castable field {} during schema projection.", - field.name() - ))); - } - } else { - if !field.is_nullable() { - return Err(ArrowError::CastError(format!( - "Attempting to set null to non-nullable field {} during schema projection.", - field.name() - ))); - } - data_arrays.push(new_null_array(desired_data_type, array_size)); - } - } - - RecordBatch::try_new(schema, data_arrays) -} - -/// This is a helper function to return the first non-empty record batch from executing a DataFrame. -/// It additionally returns a bool, which indicates if there are more record batches available. -/// We do this so we can determine if we should indicate to the user that the data has been -/// truncated. This collects until we have archived both of these two conditions -/// -/// - We have collected our minimum number of rows -/// - We have reached our limit, either data size or maximum number of rows -/// -/// Otherwise it will return when the stream has exhausted. If you want a specific number of -/// rows, set min_rows == max_rows. -async fn collect_record_batches_to_display( - df: DataFrame, - config: FormatterConfig, -) -> Result<(Vec, bool), DataFusionError> { - let FormatterConfig { - max_bytes, - min_rows, - max_rows, - } = config; - - let partitioned_stream = df.execute_stream_partitioned().await?; - let mut stream = futures::stream::iter(partitioned_stream).flatten(); - let mut size_estimate_so_far = 0; - let mut rows_so_far = 0; - let mut record_batches = Vec::default(); - let mut has_more = false; - - // Collect rows until we hit a limit (memory or max_rows) OR reach the guaranteed minimum. - // The minimum rows constraint overrides both memory and row limits to ensure a baseline - // of data is always displayed, even if it temporarily exceeds those limits. - // This provides better UX by guaranteeing users see at least min_rows rows. - while (size_estimate_so_far < max_bytes && rows_so_far < max_rows) || rows_so_far < min_rows { - let mut rb = match stream.next().await { - None => { - break; - } - Some(Ok(r)) => r, - Some(Err(e)) => return Err(e), - }; - - let mut rows_in_rb = rb.num_rows(); - if rows_in_rb > 0 { - size_estimate_so_far += rb.get_array_memory_size(); - - // When memory limit is exceeded, scale back row count proportionally to stay within budget - if size_estimate_so_far > max_bytes { - let ratio = max_bytes as f32 / size_estimate_so_far as f32; - let total_rows = rows_in_rb + rows_so_far; - - // Calculate reduced rows maintaining the memory/data proportion - let mut reduced_row_num = (total_rows as f32 * ratio).round() as usize; - // Ensure we always respect the minimum rows guarantee - if reduced_row_num < min_rows { - reduced_row_num = min_rows.min(total_rows); - } - - let limited_rows_this_rb = reduced_row_num - rows_so_far; - if limited_rows_this_rb < rows_in_rb { - rows_in_rb = limited_rows_this_rb; - rb = rb.slice(0, limited_rows_this_rb); - has_more = true; - } - } - - if rows_in_rb + rows_so_far > max_rows { - rb = rb.slice(0, max_rows - rows_so_far); - has_more = true; - } - - rows_so_far += rb.num_rows(); - record_batches.push(rb); - } - } - - if record_batches.is_empty() { - return Ok((Vec::default(), false)); - } - - if !has_more { - // Data was not already truncated, so check to see if more record batches remain - has_more = match stream.try_next().await { - Ok(None) => false, // reached end - Ok(Some(_)) => true, - Err(_) => false, // Stream disconnected - }; - } - - Ok((record_batches, has_more)) -} diff --git a/src/dataset.rs b/src/dataset.rs deleted file mode 100644 index dbeafcd9f..000000000 --- a/src/dataset.rs +++ /dev/null @@ -1,130 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::any::Any; -use std::sync::Arc; - -use async_trait::async_trait; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::pyarrow::PyArrowType; -use datafusion::catalog::Session; -use datafusion::datasource::{TableProvider, TableType}; -use datafusion::error::{DataFusionError, Result as DFResult}; -use datafusion::logical_expr::{Expr, TableProviderFilterPushDown}; -use datafusion::physical_plan::ExecutionPlan; -use pyo3::exceptions::PyValueError; -/// Implements a Datafusion TableProvider that delegates to a PyArrow Dataset -/// This allows us to use PyArrow Datasets as Datafusion tables while pushing down projections and filters -use pyo3::prelude::*; -use pyo3::types::PyType; - -use crate::dataset_exec::DatasetExec; -use crate::pyarrow_filter_expression::PyArrowFilterExpression; - -// Wraps a pyarrow.dataset.Dataset class and implements a Datafusion TableProvider around it -#[derive(Debug)] -pub(crate) struct Dataset { - dataset: Py, -} - -impl Dataset { - // Creates a Python PyArrow.Dataset - pub fn new(dataset: &Bound<'_, PyAny>, py: Python) -> PyResult { - // Ensure that we were passed an instance of pyarrow.dataset.Dataset - let ds = PyModule::import(py, "pyarrow.dataset")?; - let ds_attr = ds.getattr("Dataset")?; - let ds_type = ds_attr.cast::()?; - if dataset.is_instance(ds_type)? { - Ok(Dataset { - dataset: dataset.clone().unbind(), - }) - } else { - Err(PyValueError::new_err( - "dataset argument must be a pyarrow.dataset.Dataset object", - )) - } - } -} - -#[async_trait] -impl TableProvider for Dataset { - /// Returns the table provider as [`Any`](std::any::Any) so that it can be - /// downcast to a specific implementation. - fn as_any(&self) -> &dyn Any { - self - } - - /// Get a reference to the schema for this table - fn schema(&self) -> SchemaRef { - Python::attach(|py| { - let dataset = self.dataset.bind(py); - // This can panic but since we checked that self.dataset is a pyarrow.dataset.Dataset it should never - Arc::new( - dataset - .getattr("schema") - .unwrap() - .extract::>() - .unwrap() - .0, - ) - }) - } - - /// Get the type of this table for metadata/catalog purposes. - fn table_type(&self) -> TableType { - TableType::Base - } - - /// Create an ExecutionPlan that will scan the table. - /// The table provider will be usually responsible of grouping - /// the source data into partitions that can be efficiently - /// parallelized or distributed. - async fn scan( - &self, - _ctx: &dyn Session, - projection: Option<&Vec>, - filters: &[Expr], - // limit can be used to reduce the amount scanned - // from the datasource as a performance optimization. - // If set, it contains the amount of rows needed by the `LogicalPlan`, - // The datasource should return *at least* this number of rows if available. - _limit: Option, - ) -> DFResult> { - Python::attach(|py| { - let plan: Arc = Arc::new( - DatasetExec::new(py, self.dataset.bind(py), projection.cloned(), filters) - .map_err(|err| DataFusionError::External(Box::new(err)))?, - ); - Ok(plan) - }) - } - - /// Tests whether the table provider can make use of a filter expression - /// to optimise data retrieval. - fn supports_filters_pushdown( - &self, - filter: &[&Expr], - ) -> DFResult> { - filter - .iter() - .map(|&f| match PyArrowFilterExpression::try_from(f) { - Ok(_) => Ok(TableProviderFilterPushDown::Exact), - _ => Ok(TableProviderFilterPushDown::Unsupported), - }) - .collect() - } -} diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs deleted file mode 100644 index e3c058c07..000000000 --- a/src/dataset_exec.rs +++ /dev/null @@ -1,305 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::any::Any; -use std::sync::Arc; - -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::error::{ArrowError, Result as ArrowResult}; -use datafusion::arrow::pyarrow::PyArrowType; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::error::{DataFusionError as InnerDataFusionError, Result as DFResult}; -use datafusion::execution::context::TaskContext; -use datafusion::logical_expr::Expr; -use datafusion::logical_expr::utils::conjunction; -use datafusion::physical_expr::{EquivalenceProperties, LexOrdering}; -use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; -use datafusion::physical_plan::stream::RecordBatchStreamAdapter; -use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, Partitioning, - PlanProperties, SendableRecordBatchStream, Statistics, -}; -use futures::{TryStreamExt, stream}; -/// Implements a Datafusion physical ExecutionPlan that delegates to a PyArrow Dataset -/// This actually performs the projection, filtering and scanning of a Dataset -use pyo3::prelude::*; -use pyo3::types::{PyDict, PyIterator, PyList}; - -use crate::errors::PyDataFusionResult; -use crate::pyarrow_filter_expression::PyArrowFilterExpression; - -struct PyArrowBatchesAdapter { - batches: Py, -} - -impl Iterator for PyArrowBatchesAdapter { - type Item = ArrowResult; - - fn next(&mut self) -> Option { - Python::attach(|py| { - let mut batches = self.batches.clone_ref(py).into_bound(py); - Some( - batches - .next()? - .and_then(|batch| Ok(batch.extract::>()?.0)) - .map_err(|err| ArrowError::ExternalError(Box::new(err))), - ) - }) - } -} - -// Wraps a pyarrow.dataset.Dataset class and implements a Datafusion ExecutionPlan around it -#[derive(Debug)] -pub(crate) struct DatasetExec { - dataset: Py, - schema: SchemaRef, - fragments: Py, - columns: Option>, - filter_expr: Option>, - projected_statistics: Statistics, - plan_properties: Arc, -} - -impl DatasetExec { - pub fn new( - py: Python, - dataset: &Bound<'_, PyAny>, - projection: Option>, - filters: &[Expr], - ) -> PyDataFusionResult { - let columns: Option>> = projection.map(|p| { - p.iter() - .map(|index| { - let name: String = dataset - .getattr("schema")? - .call_method1("field", (*index,))? - .getattr("name")? - .extract()?; - Ok(name) - }) - .collect() - }); - let columns: Option> = columns.transpose()?; - let filter_expr: Option> = conjunction(filters.to_owned()) - .map(|filters| { - PyArrowFilterExpression::try_from(&filters) - .map(|filter_expr| filter_expr.inner().clone_ref(py)) - }) - .transpose()?; - - let kwargs = PyDict::new(py); - - kwargs.set_item("columns", columns.clone())?; - kwargs.set_item( - "filter", - filter_expr.as_ref().map(|expr| expr.clone_ref(py)), - )?; - - let scanner = dataset.call_method("scanner", (), Some(&kwargs))?; - - let schema = Arc::new( - scanner - .getattr("projected_schema")? - .extract::>()? - .0, - ); - - let builtins = Python::import(py, "builtins")?; - let pylist = builtins.getattr("list")?; - - // Get the fragments or partitions of the dataset - let fragments_iterator: Bound<'_, PyAny> = dataset.call_method1( - "get_fragments", - (filter_expr.as_ref().map(|expr| expr.clone_ref(py)),), - )?; - - let fragments_iter = pylist.call1((fragments_iterator,))?; - let fragments = fragments_iter.cast::().map_err(PyErr::from)?; - - let projected_statistics = Statistics::new_unknown(&schema); - let plan_properties = Arc::new(PlanProperties::new( - EquivalenceProperties::new(schema.clone()), - Partitioning::UnknownPartitioning(fragments.len()), - EmissionType::Final, - Boundedness::Bounded, - )); - - Ok(DatasetExec { - dataset: dataset.clone().unbind(), - schema, - fragments: fragments.clone().unbind(), - columns, - filter_expr, - projected_statistics, - plan_properties, - }) - } -} - -impl ExecutionPlan for DatasetExec { - fn name(&self) -> &str { - // [ExecutionPlan::name] docs recommends forwarding to `static_name` - Self::static_name() - } - - /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn Any { - self - } - - /// Get the schema for this execution plan - fn schema(&self) -> SchemaRef { - self.schema.clone() - } - - fn children(&self) -> Vec<&Arc> { - // this is a leaf node and has no children - vec![] - } - - fn with_new_children( - self: Arc, - _: Vec>, - ) -> DFResult> { - Ok(self) - } - - fn execute( - &self, - partition: usize, - context: Arc, - ) -> DFResult { - let batch_size = context.session_config().batch_size(); - Python::attach(|py| { - let dataset = self.dataset.bind(py); - let fragments = self.fragments.bind(py); - let fragment = fragments - .get_item(partition) - .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; - - // We need to pass the dataset schema to unify the fragment and dataset schema per PyArrow docs - let dataset_schema = dataset - .getattr("schema") - .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; - let kwargs = PyDict::new(py); - kwargs - .set_item("columns", self.columns.clone()) - .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; - kwargs - .set_item( - "filter", - self.filter_expr.as_ref().map(|expr| expr.clone_ref(py)), - ) - .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; - kwargs - .set_item("batch_size", batch_size) - .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; - let scanner = fragment - .call_method("scanner", (dataset_schema,), Some(&kwargs)) - .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; - let schema: SchemaRef = Arc::new( - scanner - .getattr("projected_schema") - .and_then(|schema| Ok(schema.extract::>()?.0)) - .map_err(|err| InnerDataFusionError::External(Box::new(err)))?, - ); - let record_batches: Bound<'_, PyIterator> = scanner - .call_method0("to_batches") - .map_err(|err| InnerDataFusionError::External(Box::new(err)))? - .try_iter() - .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; - - let record_batches = PyArrowBatchesAdapter { - batches: record_batches.into(), - }; - - let record_batch_stream = stream::iter(record_batches); - let record_batch_stream: SendableRecordBatchStream = Box::pin( - RecordBatchStreamAdapter::new(schema, record_batch_stream.map_err(|e| e.into())), - ); - Ok(record_batch_stream) - }) - } - - fn partition_statistics(&self, _partition: Option) -> DFResult { - Ok(self.projected_statistics.clone()) - } - - fn properties(&self) -> &Arc { - &self.plan_properties - } -} - -impl ExecutionPlanProperties for DatasetExec { - /// Get the output partitioning of this plan - fn output_partitioning(&self) -> &Partitioning { - self.plan_properties.output_partitioning() - } - - fn output_ordering(&self) -> Option<&LexOrdering> { - None - } - - fn boundedness(&self) -> Boundedness { - self.plan_properties.boundedness - } - - fn pipeline_behavior(&self) -> EmissionType { - self.plan_properties.emission_type - } - - fn equivalence_properties(&self) -> &datafusion::physical_expr::EquivalenceProperties { - &self.plan_properties.eq_properties - } -} - -impl DisplayAs for DatasetExec { - fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result { - Python::attach(|py| { - let number_of_fragments = self.fragments.bind(py).len(); - match t { - DisplayFormatType::Default - | DisplayFormatType::Verbose - | DisplayFormatType::TreeRender => { - let projected_columns: Vec = self - .schema - .fields() - .iter() - .map(|x| x.name().to_owned()) - .collect(); - if let Some(filter_expr) = &self.filter_expr { - let filter_expr = filter_expr.bind(py).str().or(Err(std::fmt::Error))?; - write!( - f, - "DatasetExec: number_of_fragments={}, filter_expr={}, projection=[{}]", - number_of_fragments, - filter_expr, - projected_columns.join(", "), - ) - } else { - write!( - f, - "DatasetExec: number_of_fragments={}, projection=[{}]", - number_of_fragments, - projected_columns.join(", "), - ) - } - } - } - }) - } -} diff --git a/src/errors.rs b/src/errors.rs deleted file mode 100644 index 0d25c8847..000000000 --- a/src/errors.rs +++ /dev/null @@ -1,108 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use core::fmt; -use std::error::Error; -use std::fmt::Debug; - -use datafusion::arrow::error::ArrowError; -use datafusion::error::DataFusionError as InnerDataFusionError; -use prost::EncodeError; -use pyo3::PyErr; -use pyo3::exceptions::{PyException, PyValueError}; - -pub type PyDataFusionResult = std::result::Result; - -#[derive(Debug)] -pub enum PyDataFusionError { - ExecutionError(Box), - ArrowError(ArrowError), - Common(String), - PythonError(PyErr), - EncodeError(EncodeError), -} - -impl fmt::Display for PyDataFusionError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - PyDataFusionError::ExecutionError(e) => write!(f, "DataFusion error: {e}"), - PyDataFusionError::ArrowError(e) => write!(f, "Arrow error: {e:?}"), - PyDataFusionError::PythonError(e) => write!(f, "Python error {e:?}"), - PyDataFusionError::Common(e) => write!(f, "{e}"), - PyDataFusionError::EncodeError(e) => write!(f, "Failed to encode substrait plan: {e}"), - } - } -} - -impl From for PyDataFusionError { - fn from(err: ArrowError) -> PyDataFusionError { - PyDataFusionError::ArrowError(err) - } -} - -impl From for PyDataFusionError { - fn from(err: InnerDataFusionError) -> PyDataFusionError { - PyDataFusionError::ExecutionError(Box::new(err)) - } -} - -impl From for PyDataFusionError { - fn from(err: PyErr) -> PyDataFusionError { - PyDataFusionError::PythonError(err) - } -} - -impl From for PyErr { - fn from(err: PyDataFusionError) -> PyErr { - match err { - PyDataFusionError::PythonError(py_err) => py_err, - _ => PyException::new_err(err.to_string()), - } - } -} - -impl Error for PyDataFusionError {} - -pub fn py_type_err(e: impl Debug) -> PyErr { - PyErr::new::(format!("{e:?}")) -} - -pub fn py_runtime_err(e: impl Debug) -> PyErr { - PyErr::new::(format!("{e:?}")) -} - -pub fn py_datafusion_err(e: impl Debug) -> PyErr { - PyErr::new::(format!("{e:?}")) -} - -pub fn py_unsupported_variant_err(e: impl Debug) -> PyErr { - PyErr::new::(format!("{e:?}")) -} - -pub fn to_datafusion_err(e: impl Debug) -> InnerDataFusionError { - InnerDataFusionError::Execution(format!("{e:?}")) -} - -pub fn from_datafusion_error(err: InnerDataFusionError) -> PyErr { - match err { - InnerDataFusionError::External(boxed) => match boxed.downcast::() { - Ok(py_err) => *py_err, - Err(original_boxed) => PyValueError::new_err(format!("{original_boxed}")), - }, - _ => PyValueError::new_err(format!("{err}")), - } -} diff --git a/src/expr.rs b/src/expr.rs deleted file mode 100644 index c4f2a12da..000000000 --- a/src/expr.rs +++ /dev/null @@ -1,884 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::collections::HashMap; -use std::convert::{From, Into}; -use std::sync::Arc; - -use datafusion::arrow::datatypes::{DataType, Field}; -use datafusion::arrow::pyarrow::PyArrowType; -use datafusion::functions::core::expr_ext::FieldAccessor; -use datafusion::logical_expr::expr::{ - AggregateFunction, AggregateFunctionParams, FieldMetadata, InList, InSubquery, ScalarFunction, - SetComparison, WindowFunction, -}; -use datafusion::logical_expr::utils::exprlist_to_fields; -use datafusion::logical_expr::{ - Between, BinaryExpr, Case, Cast, Expr, ExprFuncBuilder, ExprFunctionExt, Like, LogicalPlan, - Operator, TryCast, WindowFunctionDefinition, col, lit, lit_with_metadata, -}; -use pyo3::IntoPyObjectExt; -use pyo3::basic::CompareOp; -use pyo3::prelude::*; -use window::PyWindowFrame; - -use self::alias::PyAlias; -use self::bool_expr::{ - PyIsFalse, PyIsNotFalse, PyIsNotNull, PyIsNotTrue, PyIsNotUnknown, PyIsNull, PyIsTrue, - PyIsUnknown, PyNegative, PyNot, -}; -use self::like::{PyILike, PyLike, PySimilarTo}; -use self::scalar_variable::PyScalarVariable; -use crate::common::data_type::{DataTypeMap, NullTreatment, PyScalarValue, RexType}; -use crate::errors::{PyDataFusionResult, py_runtime_err, py_type_err, py_unsupported_variant_err}; -use crate::expr::aggregate_expr::PyAggregateFunction; -use crate::expr::binary_expr::PyBinaryExpr; -use crate::expr::column::PyColumn; -use crate::expr::literal::PyLiteral; -use crate::functions::add_builder_fns_to_window; -use crate::pyarrow_util::scalar_to_pyarrow; -use crate::sql::logical::PyLogicalPlan; - -pub mod aggregate; -pub mod aggregate_expr; -pub mod alias; -pub mod analyze; -pub mod between; -pub mod binary_expr; -pub mod bool_expr; -pub mod case; -pub mod cast; -pub mod column; -pub mod conditional_expr; -pub mod copy_to; -pub mod create_catalog; -pub mod create_catalog_schema; -pub mod create_external_table; -pub mod create_function; -pub mod create_index; -pub mod create_memory_table; -pub mod create_view; -pub mod describe_table; -pub mod distinct; -pub mod dml; -pub mod drop_catalog_schema; -pub mod drop_function; -pub mod drop_table; -pub mod drop_view; -pub mod empty_relation; -pub mod exists; -pub mod explain; -pub mod extension; -pub mod filter; -pub mod grouping_set; -pub mod in_list; -pub mod in_subquery; -pub mod join; -pub mod like; -pub mod limit; -pub mod literal; -pub mod logical_node; -pub mod placeholder; -pub mod projection; -pub mod recursive_query; -pub mod repartition; -pub mod scalar_subquery; -pub mod scalar_variable; -pub mod set_comparison; -pub mod signature; -pub mod sort; -pub mod sort_expr; -pub mod statement; -pub mod subquery; -pub mod subquery_alias; -pub mod table_scan; -pub mod union; -pub mod unnest; -pub mod unnest_expr; -pub mod values; -pub mod window; - -use sort_expr::{PySortExpr, to_sort_expressions}; - -/// A PyExpr that can be used on a DataFrame -#[pyclass( - from_py_object, - frozen, - name = "RawExpr", - module = "datafusion.expr", - subclass -)] -#[derive(Debug, Clone)] -pub struct PyExpr { - pub expr: Expr, -} - -impl From for Expr { - fn from(expr: PyExpr) -> Expr { - expr.expr - } -} - -impl From for PyExpr { - fn from(expr: Expr) -> PyExpr { - PyExpr { expr } - } -} - -/// Convert a list of DataFusion Expr to PyExpr -pub fn py_expr_list(expr: &[Expr]) -> PyResult> { - Ok(expr.iter().map(|e| PyExpr::from(e.clone())).collect()) -} - -#[pymethods] -impl PyExpr { - /// Return the specific expression - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - Python::attach(|_| match &self.expr { - Expr::Alias(alias) => Ok(PyAlias::from(alias.clone()).into_bound_py_any(py)?), - Expr::Column(col) => Ok(PyColumn::from(col.clone()).into_bound_py_any(py)?), - Expr::ScalarVariable(field, variables) => { - Ok(PyScalarVariable::new(field, variables).into_bound_py_any(py)?) - } - Expr::Like(value) => Ok(PyLike::from(value.clone()).into_bound_py_any(py)?), - Expr::Literal(value, metadata) => Ok(PyLiteral::new_with_metadata( - value.clone(), - metadata.clone(), - ) - .into_bound_py_any(py)?), - Expr::BinaryExpr(expr) => Ok(PyBinaryExpr::from(expr.clone()).into_bound_py_any(py)?), - Expr::Not(expr) => Ok(PyNot::new(*expr.clone()).into_bound_py_any(py)?), - Expr::IsNotNull(expr) => Ok(PyIsNotNull::new(*expr.clone()).into_bound_py_any(py)?), - Expr::IsNull(expr) => Ok(PyIsNull::new(*expr.clone()).into_bound_py_any(py)?), - Expr::IsTrue(expr) => Ok(PyIsTrue::new(*expr.clone()).into_bound_py_any(py)?), - Expr::IsFalse(expr) => Ok(PyIsFalse::new(*expr.clone()).into_bound_py_any(py)?), - Expr::IsUnknown(expr) => Ok(PyIsUnknown::new(*expr.clone()).into_bound_py_any(py)?), - Expr::IsNotTrue(expr) => Ok(PyIsNotTrue::new(*expr.clone()).into_bound_py_any(py)?), - Expr::IsNotFalse(expr) => Ok(PyIsNotFalse::new(*expr.clone()).into_bound_py_any(py)?), - Expr::IsNotUnknown(expr) => { - Ok(PyIsNotUnknown::new(*expr.clone()).into_bound_py_any(py)?) - } - Expr::Negative(expr) => Ok(PyNegative::new(*expr.clone()).into_bound_py_any(py)?), - Expr::AggregateFunction(expr) => { - Ok(PyAggregateFunction::from(expr.clone()).into_bound_py_any(py)?) - } - Expr::SimilarTo(value) => Ok(PySimilarTo::from(value.clone()).into_bound_py_any(py)?), - Expr::Between(value) => { - Ok(between::PyBetween::from(value.clone()).into_bound_py_any(py)?) - } - Expr::Case(value) => Ok(case::PyCase::from(value.clone()).into_bound_py_any(py)?), - Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_bound_py_any(py)?), - Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_bound_py_any(py)?), - Expr::ScalarFunction(value) => Err(py_unsupported_variant_err(format!( - "Converting Expr::ScalarFunction to a Python object is not implemented: {value:?}" - ))), - Expr::WindowFunction(value) => Err(py_unsupported_variant_err(format!( - "Converting Expr::WindowFunction to a Python object is not implemented: {value:?}" - ))), - Expr::InList(value) => { - Ok(in_list::PyInList::from(value.clone()).into_bound_py_any(py)?) - } - Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_bound_py_any(py)?), - Expr::InSubquery(value) => { - Ok(in_subquery::PyInSubquery::from(value.clone()).into_bound_py_any(py)?) - } - Expr::ScalarSubquery(value) => { - Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_bound_py_any(py)?) - } - #[allow(deprecated)] - Expr::Wildcard { qualifier, options } => Err(py_unsupported_variant_err(format!( - "Converting Expr::Wildcard to a Python object is not implemented : {qualifier:?} {options:?}" - ))), - Expr::GroupingSet(value) => { - Ok(grouping_set::PyGroupingSet::from(value.clone()).into_bound_py_any(py)?) - } - Expr::Placeholder(value) => { - Ok(placeholder::PyPlaceholder::from(value.clone()).into_bound_py_any(py)?) - } - Expr::OuterReferenceColumn(data_type, column) => { - Err(py_unsupported_variant_err(format!( - "Converting Expr::OuterReferenceColumn to a Python object is not implemented: {data_type:?} - {column:?}" - ))) - } - Expr::Unnest(value) => { - Ok(unnest_expr::PyUnnestExpr::from(value.clone()).into_bound_py_any(py)?) - } - Expr::SetComparison(value) => { - Ok(set_comparison::PySetComparison::from(value.clone()).into_bound_py_any(py)?) - } - }) - } - - /// Returns the name of this expression as it should appear in a schema. This name - /// will not include any CAST expressions. - fn schema_name(&self) -> PyResult { - Ok(format!("{}", self.expr.schema_name())) - } - - /// Returns a full and complete string representation of this expression. - fn canonical_name(&self) -> PyResult { - Ok(format!("{}", self.expr)) - } - - /// Returns the name of the Expr variant. - /// Ex: 'IsNotNull', 'Literal', 'BinaryExpr', etc - fn variant_name(&self) -> PyResult<&str> { - Ok(self.expr.variant_name()) - } - - fn __richcmp__(&self, other: PyExpr, op: CompareOp) -> PyExpr { - let expr = match op { - CompareOp::Lt => self.expr.clone().lt(other.expr), - CompareOp::Le => self.expr.clone().lt_eq(other.expr), - CompareOp::Eq => self.expr.clone().eq(other.expr), - CompareOp::Ne => self.expr.clone().not_eq(other.expr), - CompareOp::Gt => self.expr.clone().gt(other.expr), - CompareOp::Ge => self.expr.clone().gt_eq(other.expr), - }; - expr.into() - } - - fn __repr__(&self) -> PyResult { - Ok(format!("Expr({})", self.expr)) - } - - fn __add__(&self, rhs: PyExpr) -> PyResult { - Ok((self.expr.clone() + rhs.expr).into()) - } - - fn __sub__(&self, rhs: PyExpr) -> PyResult { - Ok((self.expr.clone() - rhs.expr).into()) - } - - fn __truediv__(&self, rhs: PyExpr) -> PyResult { - Ok((self.expr.clone() / rhs.expr).into()) - } - - fn __mul__(&self, rhs: PyExpr) -> PyResult { - Ok((self.expr.clone() * rhs.expr).into()) - } - - fn __mod__(&self, rhs: PyExpr) -> PyResult { - let expr = self.expr.clone() % rhs.expr; - Ok(expr.into()) - } - - fn __and__(&self, rhs: PyExpr) -> PyResult { - Ok(self.expr.clone().and(rhs.expr).into()) - } - - fn __or__(&self, rhs: PyExpr) -> PyResult { - Ok(self.expr.clone().or(rhs.expr).into()) - } - - fn __invert__(&self) -> PyResult { - let expr = !self.expr.clone(); - Ok(expr.into()) - } - - fn __getitem__(&self, key: &str) -> PyResult { - Ok(self.expr.clone().field(key).into()) - } - - #[staticmethod] - pub fn literal(value: PyScalarValue) -> PyExpr { - lit(value.0).into() - } - - #[staticmethod] - pub fn literal_with_metadata( - value: PyScalarValue, - metadata: HashMap, - ) -> PyExpr { - let metadata = FieldMetadata::new(metadata.into_iter().collect()); - lit_with_metadata(value.0, Some(metadata)).into() - } - - #[staticmethod] - pub fn column(value: &str) -> PyExpr { - col(value).into() - } - - /// assign a name to the PyExpr - #[pyo3(signature = (name, metadata=None))] - pub fn alias(&self, name: &str, metadata: Option>) -> PyExpr { - let metadata = metadata.map(|m| FieldMetadata::new(m.into_iter().collect())); - self.expr.clone().alias_with_metadata(name, metadata).into() - } - - /// Create a sort PyExpr from an existing PyExpr. - #[pyo3(signature = (ascending=true, nulls_first=true))] - pub fn sort(&self, ascending: bool, nulls_first: bool) -> PySortExpr { - self.expr.clone().sort(ascending, nulls_first).into() - } - - pub fn is_null(&self) -> PyExpr { - self.expr.clone().is_null().into() - } - - pub fn is_not_null(&self) -> PyExpr { - self.expr.clone().is_not_null().into() - } - - pub fn cast(&self, to: PyArrowType) -> PyExpr { - // self.expr.cast_to() requires DFSchema to validate that the cast - // is supported, omit that for now - let expr = Expr::Cast(Cast::new(Box::new(self.expr.clone()), to.0)); - expr.into() - } - - #[pyo3(signature = (low, high, negated=false))] - pub fn between(&self, low: PyExpr, high: PyExpr, negated: bool) -> PyExpr { - let expr = Expr::Between(Between::new( - Box::new(self.expr.clone()), - negated, - Box::new(low.into()), - Box::new(high.into()), - )); - expr.into() - } - - /// A Rex (Row Expression) specifies a single row of data. That specification - /// could include user defined functions or types. RexType identifies the row - /// as one of the possible valid `RexTypes`. - pub fn rex_type(&self) -> PyResult { - Ok(match self.expr { - Expr::Alias(..) => RexType::Alias, - Expr::Column(..) => RexType::Reference, - Expr::ScalarVariable(..) | Expr::Literal(..) => RexType::Literal, - Expr::BinaryExpr { .. } - | Expr::Not(..) - | Expr::IsNotNull(..) - | Expr::Negative(..) - | Expr::IsNull(..) - | Expr::Like { .. } - | Expr::SimilarTo { .. } - | Expr::Between { .. } - | Expr::Case { .. } - | Expr::Cast { .. } - | Expr::TryCast { .. } - | Expr::ScalarFunction { .. } - | Expr::AggregateFunction { .. } - | Expr::WindowFunction { .. } - | Expr::InList { .. } - | Expr::Exists { .. } - | Expr::InSubquery { .. } - | Expr::GroupingSet(..) - | Expr::IsTrue(..) - | Expr::IsFalse(..) - | Expr::IsUnknown(_) - | Expr::IsNotTrue(..) - | Expr::IsNotFalse(..) - | Expr::Placeholder { .. } - | Expr::OuterReferenceColumn(_, _) - | Expr::Unnest(_) - | Expr::IsNotUnknown(_) - | Expr::SetComparison(_) => RexType::Call, - Expr::ScalarSubquery(..) => RexType::ScalarSubquery, - #[allow(deprecated)] - Expr::Wildcard { .. } => { - return Err(py_unsupported_variant_err("Expr::Wildcard is unsupported")); - } - }) - } - - /// Given the current `Expr` return the DataTypeMap which represents the - /// PythonType, Arrow DataType, and SqlType Enum which represents - pub fn types(&self) -> PyResult { - Self::_types(&self.expr) - } - - /// Extracts the Expr value into a Py that can be shared with Python - pub fn python_value<'py>(&self, py: Python<'py>) -> PyResult> { - match &self.expr { - Expr::Literal(scalar_value, _) => scalar_to_pyarrow(scalar_value, py), - _ => Err(py_type_err(format!( - "Non Expr::Literal encountered in types: {:?}", - &self.expr - ))), - } - } - - /// Row expressions, Rex(s), operate on the concept of operands. Different variants of Expressions, Expr(s), - /// store those operands in different datastructures. This function examines the Expr variant and returns - /// the operands to the calling logic as a Vec of PyExpr instances. - pub fn rex_call_operands(&self) -> PyResult> { - match &self.expr { - // Expr variants that are themselves the operand to return - Expr::Column(..) | Expr::ScalarVariable(..) | Expr::Literal(..) => { - Ok(vec![PyExpr::from(self.expr.clone())]) - } - - Expr::Alias(alias) => Ok(vec![PyExpr::from(*alias.expr.clone())]), - - // Expr(s) that house the Expr instance to return in their bounded params - Expr::Not(expr) - | Expr::IsNull(expr) - | Expr::IsNotNull(expr) - | Expr::IsTrue(expr) - | Expr::IsFalse(expr) - | Expr::IsUnknown(expr) - | Expr::IsNotTrue(expr) - | Expr::IsNotFalse(expr) - | Expr::IsNotUnknown(expr) - | Expr::Negative(expr) - | Expr::Cast(Cast { expr, .. }) - | Expr::TryCast(TryCast { expr, .. }) - | Expr::InSubquery(InSubquery { expr, .. }) - | Expr::SetComparison(SetComparison { expr, .. }) => { - Ok(vec![PyExpr::from(*expr.clone())]) - } - - // Expr variants containing a collection of Expr(s) for operands - Expr::AggregateFunction(AggregateFunction { - params: AggregateFunctionParams { args, .. }, - .. - }) - | Expr::ScalarFunction(ScalarFunction { args, .. }) => { - Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()) - } - Expr::WindowFunction(boxed_window_fn) => { - let args = &boxed_window_fn.params.args; - Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()) - } - - // Expr(s) that require more specific processing - Expr::Case(Case { - expr, - when_then_expr, - else_expr, - }) => { - let mut operands: Vec = Vec::new(); - - if let Some(e) = expr { - for (when, then) in when_then_expr { - operands.push(PyExpr::from(Expr::BinaryExpr(BinaryExpr::new( - Box::new(*e.clone()), - Operator::Eq, - Box::new(*when.clone()), - )))); - operands.push(PyExpr::from(*then.clone())); - } - } else { - for (when, then) in when_then_expr { - operands.push(PyExpr::from(*when.clone())); - operands.push(PyExpr::from(*then.clone())); - } - }; - - if let Some(e) = else_expr { - operands.push(PyExpr::from(*e.clone())); - }; - - Ok(operands) - } - Expr::InList(InList { expr, list, .. }) => { - let mut operands: Vec = vec![PyExpr::from(*expr.clone())]; - for list_elem in list { - operands.push(PyExpr::from(list_elem.clone())); - } - - Ok(operands) - } - Expr::BinaryExpr(BinaryExpr { left, right, .. }) => Ok(vec![ - PyExpr::from(*left.clone()), - PyExpr::from(*right.clone()), - ]), - Expr::Like(Like { expr, pattern, .. }) => Ok(vec![ - PyExpr::from(*expr.clone()), - PyExpr::from(*pattern.clone()), - ]), - Expr::SimilarTo(Like { expr, pattern, .. }) => Ok(vec![ - PyExpr::from(*expr.clone()), - PyExpr::from(*pattern.clone()), - ]), - Expr::Between(Between { - expr, - negated: _, - low, - high, - }) => Ok(vec![ - PyExpr::from(*expr.clone()), - PyExpr::from(*low.clone()), - PyExpr::from(*high.clone()), - ]), - - // Currently un-support/implemented Expr types for Rex Call operations - Expr::GroupingSet(..) - | Expr::Unnest(_) - | Expr::OuterReferenceColumn(_, _) - | Expr::ScalarSubquery(..) - | Expr::Placeholder { .. } - | Expr::Exists { .. } => Err(py_runtime_err(format!( - "Unimplemented Expr type: {}", - self.expr - ))), - - #[allow(deprecated)] - Expr::Wildcard { .. } => { - Err(py_unsupported_variant_err("Expr::Wildcard is unsupported")) - } - } - } - - /// Extracts the operator associated with a RexType::Call - pub fn rex_call_operator(&self) -> PyResult { - Ok(match &self.expr { - Expr::BinaryExpr(BinaryExpr { - left: _, - op, - right: _, - }) => format!("{op}"), - Expr::ScalarFunction(ScalarFunction { func, args: _ }) => func.name().to_string(), - Expr::Cast { .. } => "cast".to_string(), - Expr::Between { .. } => "between".to_string(), - Expr::Case { .. } => "case".to_string(), - Expr::IsNull(..) => "is null".to_string(), - Expr::IsNotNull(..) => "is not null".to_string(), - Expr::IsTrue(_) => "is true".to_string(), - Expr::IsFalse(_) => "is false".to_string(), - Expr::IsUnknown(_) => "is unknown".to_string(), - Expr::IsNotTrue(_) => "is not true".to_string(), - Expr::IsNotFalse(_) => "is not false".to_string(), - Expr::IsNotUnknown(_) => "is not unknown".to_string(), - Expr::InList { .. } => "in list".to_string(), - Expr::Negative(..) => "negative".to_string(), - Expr::Not(..) => "not".to_string(), - Expr::Like(Like { - negated, - case_insensitive, - .. - }) => { - let name = if *case_insensitive { "ilike" } else { "like" }; - if *negated { - format!("not {name}") - } else { - name.to_string() - } - } - Expr::SimilarTo(Like { negated, .. }) => { - if *negated { - "not similar to".to_string() - } else { - "similar to".to_string() - } - } - _ => { - return Err(py_type_err(format!( - "Catch all triggered in get_operator_name: {:?}", - &self.expr - ))); - } - }) - } - - pub fn column_name(&self, plan: PyLogicalPlan) -> PyResult { - self._column_name(&plan.plan()).map_err(py_runtime_err) - } - - // Expression Function Builder functions - - pub fn order_by(&self, order_by: Vec) -> PyExprFuncBuilder { - self.expr - .clone() - .order_by(to_sort_expressions(order_by)) - .into() - } - - pub fn filter(&self, filter: PyExpr) -> PyExprFuncBuilder { - self.expr.clone().filter(filter.expr.clone()).into() - } - - pub fn distinct(&self) -> PyExprFuncBuilder { - self.expr.clone().distinct().into() - } - - pub fn null_treatment(&self, null_treatment: NullTreatment) -> PyExprFuncBuilder { - self.expr - .clone() - .null_treatment(Some(null_treatment.into())) - .into() - } - - pub fn partition_by(&self, partition_by: Vec) -> PyExprFuncBuilder { - let partition_by = partition_by.iter().map(|e| e.expr.clone()).collect(); - self.expr.clone().partition_by(partition_by).into() - } - - pub fn window_frame(&self, window_frame: PyWindowFrame) -> PyExprFuncBuilder { - self.expr.clone().window_frame(window_frame.into()).into() - } - - #[pyo3(signature = (partition_by=None, window_frame=None, order_by=None, null_treatment=None))] - pub fn over( - &self, - partition_by: Option>, - window_frame: Option, - order_by: Option>, - null_treatment: Option, - ) -> PyDataFusionResult { - match &self.expr { - Expr::AggregateFunction(agg_fn) => { - let window_fn = Expr::WindowFunction(Box::new(WindowFunction::new( - WindowFunctionDefinition::AggregateUDF(agg_fn.func.clone()), - agg_fn.params.args.clone(), - ))); - - add_builder_fns_to_window( - window_fn, - partition_by, - window_frame, - order_by, - null_treatment, - ) - } - Expr::WindowFunction(_) => add_builder_fns_to_window( - self.expr.clone(), - partition_by, - window_frame, - order_by, - null_treatment, - ), - _ => Err(datafusion::error::DataFusionError::Plan(format!( - "Using {} with `over` is not allowed. Must use an aggregate or window function.", - self.expr.variant_name() - )) - .into()), - } - } -} - -#[pyclass( - from_py_object, - frozen, - name = "ExprFuncBuilder", - module = "datafusion.expr", - subclass -)] -#[derive(Debug, Clone)] -pub struct PyExprFuncBuilder { - pub builder: ExprFuncBuilder, -} - -impl From for PyExprFuncBuilder { - fn from(builder: ExprFuncBuilder) -> Self { - Self { builder } - } -} - -#[pymethods] -impl PyExprFuncBuilder { - pub fn order_by(&self, order_by: Vec) -> PyExprFuncBuilder { - self.builder - .clone() - .order_by(to_sort_expressions(order_by)) - .into() - } - - pub fn filter(&self, filter: PyExpr) -> PyExprFuncBuilder { - self.builder.clone().filter(filter.expr.clone()).into() - } - - pub fn distinct(&self) -> PyExprFuncBuilder { - self.builder.clone().distinct().into() - } - - pub fn null_treatment(&self, null_treatment: NullTreatment) -> PyExprFuncBuilder { - self.builder - .clone() - .null_treatment(Some(null_treatment.into())) - .into() - } - - pub fn partition_by(&self, partition_by: Vec) -> PyExprFuncBuilder { - let partition_by = partition_by.iter().map(|e| e.expr.clone()).collect(); - self.builder.clone().partition_by(partition_by).into() - } - - pub fn window_frame(&self, window_frame: PyWindowFrame) -> PyExprFuncBuilder { - self.builder - .clone() - .window_frame(window_frame.into()) - .into() - } - - pub fn build(&self) -> PyDataFusionResult { - Ok(self.builder.clone().build().map(|expr| expr.into())?) - } -} - -impl PyExpr { - pub fn _column_name(&self, plan: &LogicalPlan) -> PyDataFusionResult { - let field = Self::expr_to_field(&self.expr, plan)?; - Ok(field.name().to_owned()) - } - - /// Create a [Field] representing an [Expr], given an input [LogicalPlan] to resolve against - pub fn expr_to_field(expr: &Expr, input_plan: &LogicalPlan) -> PyDataFusionResult> { - let fields = exprlist_to_fields(std::slice::from_ref(expr), input_plan)?; - Ok(fields[0].1.clone()) - } - fn _types(expr: &Expr) -> PyResult { - match expr { - Expr::BinaryExpr(BinaryExpr { - left: _, - op, - right: _, - }) => match op { - Operator::Eq - | Operator::NotEq - | Operator::Lt - | Operator::LtEq - | Operator::Gt - | Operator::GtEq - | Operator::And - | Operator::Or - | Operator::IsDistinctFrom - | Operator::IsNotDistinctFrom - | Operator::RegexMatch - | Operator::RegexIMatch - | Operator::RegexNotMatch - | Operator::RegexNotIMatch - | Operator::LikeMatch - | Operator::ILikeMatch - | Operator::NotLikeMatch - | Operator::NotILikeMatch => DataTypeMap::map_from_arrow_type(&DataType::Boolean), - Operator::Plus | Operator::Minus | Operator::Multiply | Operator::Modulo => { - DataTypeMap::map_from_arrow_type(&DataType::Int64) - } - Operator::Divide => DataTypeMap::map_from_arrow_type(&DataType::Float64), - Operator::StringConcat => DataTypeMap::map_from_arrow_type(&DataType::Utf8), - Operator::BitwiseShiftLeft - | Operator::BitwiseShiftRight - | Operator::BitwiseXor - | Operator::BitwiseAnd - | Operator::BitwiseOr => DataTypeMap::map_from_arrow_type(&DataType::Binary), - Operator::AtArrow - | Operator::ArrowAt - | Operator::Arrow - | Operator::LongArrow - | Operator::HashArrow - | Operator::HashLongArrow - | Operator::AtAt - | Operator::IntegerDivide - | Operator::HashMinus - | Operator::AtQuestion - | Operator::Question - | Operator::QuestionAnd - | Operator::QuestionPipe - | Operator::Colon => Err(py_type_err(format!("Unsupported expr: ${op}"))), - }, - Expr::Cast(Cast { expr: _, data_type }) => DataTypeMap::map_from_arrow_type(data_type), - Expr::Literal(scalar_value, _) => DataTypeMap::map_from_scalar_value(scalar_value), - _ => Err(py_type_err(format!( - "Non Expr::Literal encountered in types: {expr:?}" - ))), - } - } -} - -/// Initializes the `expr` module to match the pattern of `datafusion-expr` https://docs.rs/datafusion-expr/latest/datafusion_expr/ -pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - - Ok(()) -} diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs deleted file mode 100644 index 5a6a771a7..000000000 --- a/src/expr/aggregate.rs +++ /dev/null @@ -1,167 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::common::DataFusionError; -use datafusion::logical_expr::Expr; -use datafusion::logical_expr::expr::{AggregateFunction, AggregateFunctionParams, Alias}; -use datafusion::logical_expr::logical_plan::Aggregate; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::errors::py_type_err; -use crate::expr::PyExpr; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Aggregate", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyAggregate { - aggregate: Aggregate, -} - -impl From for PyAggregate { - fn from(aggregate: Aggregate) -> PyAggregate { - PyAggregate { aggregate } - } -} - -impl TryFrom for Aggregate { - type Error = DataFusionError; - - fn try_from(agg: PyAggregate) -> Result { - Ok(agg.aggregate) - } -} - -impl Display for PyAggregate { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Aggregate - \nGroupBy(s): {:?} - \nAggregates(s): {:?} - \nInput: {:?} - \nProjected Schema: {:?}", - &self.aggregate.group_expr, - &self.aggregate.aggr_expr, - self.aggregate.input, - self.aggregate.schema - ) - } -} - -#[pymethods] -impl PyAggregate { - /// Retrieves the grouping expressions for this `Aggregate` - fn group_by_exprs(&self) -> PyResult> { - Ok(self - .aggregate - .group_expr - .iter() - .map(|e| PyExpr::from(e.clone())) - .collect()) - } - - /// Retrieves the aggregate expressions for this `Aggregate` - fn aggregate_exprs(&self) -> PyResult> { - Ok(self - .aggregate - .aggr_expr - .iter() - .map(|e| PyExpr::from(e.clone())) - .collect()) - } - - /// Returns the inner Aggregate Expr(s) - pub fn agg_expressions(&self) -> PyResult> { - Ok(self - .aggregate - .aggr_expr - .iter() - .map(|e| PyExpr::from(e.clone())) - .collect()) - } - - pub fn agg_func_name(&self, expr: PyExpr) -> PyResult { - Self::_agg_func_name(&expr.expr) - } - - pub fn aggregation_arguments(&self, expr: PyExpr) -> PyResult> { - self._aggregation_arguments(&expr.expr) - } - - // Retrieves the input `LogicalPlan` to this `Aggregate` node - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - // Resulting Schema for this `Aggregate` node instance - fn schema(&self) -> PyDFSchema { - (*self.aggregate.schema).clone().into() - } - - fn __repr__(&self) -> PyResult { - Ok(format!("Aggregate({self})")) - } -} - -impl PyAggregate { - #[allow(clippy::only_used_in_recursion)] - fn _aggregation_arguments(&self, expr: &Expr) -> PyResult> { - match expr { - // TODO: This Alias logic seems to be returning some strange results that we should investigate - Expr::Alias(Alias { expr, .. }) => self._aggregation_arguments(expr.as_ref()), - Expr::AggregateFunction(AggregateFunction { - func: _, - params: AggregateFunctionParams { args, .. }, - .. - }) => Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect()), - _ => Err(py_type_err( - "Encountered a non Aggregate type in aggregation_arguments", - )), - } - } - - fn _agg_func_name(expr: &Expr) -> PyResult { - match expr { - Expr::Alias(Alias { expr, .. }) => Self::_agg_func_name(expr.as_ref()), - Expr::AggregateFunction(AggregateFunction { func, .. }) => Ok(func.name().to_owned()), - _ => Err(py_type_err( - "Encountered a non Aggregate type in agg_func_name", - )), - } - } -} - -impl LogicalNode for PyAggregate { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.aggregate.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/aggregate_expr.rs b/src/expr/aggregate_expr.rs deleted file mode 100644 index 88e47999f..000000000 --- a/src/expr/aggregate_expr.rs +++ /dev/null @@ -1,88 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{Display, Formatter}; - -use datafusion::logical_expr::expr::AggregateFunction; -use pyo3::prelude::*; - -use crate::expr::PyExpr; - -#[pyclass( - from_py_object, - frozen, - name = "AggregateFunction", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyAggregateFunction { - aggr: AggregateFunction, -} - -impl From for AggregateFunction { - fn from(aggr: PyAggregateFunction) -> Self { - aggr.aggr - } -} - -impl From for PyAggregateFunction { - fn from(aggr: AggregateFunction) -> PyAggregateFunction { - PyAggregateFunction { aggr } - } -} - -impl Display for PyAggregateFunction { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - let args: Vec = self - .aggr - .params - .args - .iter() - .map(|expr| expr.to_string()) - .collect(); - write!(f, "{}({})", self.aggr.func.name(), args.join(", ")) - } -} - -#[pymethods] -impl PyAggregateFunction { - /// Get the aggregate type, such as "MIN", or "MAX" - fn aggregate_type(&self) -> String { - self.aggr.func.name().to_string() - } - - /// is this a distinct aggregate such as `COUNT(DISTINCT expr)` - fn is_distinct(&self) -> bool { - self.aggr.params.distinct - } - - /// Get the arguments to the aggregate function - fn args(&self) -> Vec { - self.aggr - .params - .args - .iter() - .map(|expr| PyExpr::from(expr.clone())) - .collect() - } - - /// Get a String representation of this column - fn __repr__(&self) -> String { - format!("{self}") - } -} diff --git a/src/expr/alias.rs b/src/expr/alias.rs deleted file mode 100644 index b76e82e22..000000000 --- a/src/expr/alias.rs +++ /dev/null @@ -1,76 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::expr::Alias; -use pyo3::prelude::*; - -use crate::expr::PyExpr; - -#[pyclass( - from_py_object, - frozen, - name = "Alias", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyAlias { - alias: Alias, -} - -impl From for PyAlias { - fn from(alias: Alias) -> Self { - Self { alias } - } -} - -impl From for Alias { - fn from(py_alias: PyAlias) -> Self { - py_alias.alias - } -} - -impl Display for PyAlias { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Alias - \nExpr: `{:?}` - \nAlias Name: `{}`", - &self.alias.expr, &self.alias.name - ) - } -} - -#[pymethods] -impl PyAlias { - /// Retrieve the "name" of the alias - fn alias(&self) -> PyResult { - Ok(self.alias.name.clone()) - } - - fn expr(&self) -> PyResult { - Ok((*self.alias.expr.clone()).into()) - } - - /// Get a String representation of this column - fn __repr__(&self) -> String { - format!("{self}") - } -} diff --git a/src/expr/analyze.rs b/src/expr/analyze.rs deleted file mode 100644 index 137765fe1..000000000 --- a/src/expr/analyze.rs +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::logical_plan::Analyze; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Analyze", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyAnalyze { - analyze: Analyze, -} - -impl PyAnalyze { - pub fn new(analyze: Analyze) -> Self { - Self { analyze } - } -} - -impl From for PyAnalyze { - fn from(analyze: Analyze) -> PyAnalyze { - PyAnalyze { analyze } - } -} - -impl From for Analyze { - fn from(analyze: PyAnalyze) -> Self { - analyze.analyze - } -} - -impl Display for PyAnalyze { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "Analyze Table") - } -} - -#[pymethods] -impl PyAnalyze { - fn verbose(&self) -> PyResult { - Ok(self.analyze.verbose) - } - - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - /// Resulting Schema for this `Analyze` node instance - fn schema(&self) -> PyResult { - Ok((*self.analyze.schema).clone().into()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("Analyze({self})")) - } -} - -impl LogicalNode for PyAnalyze { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.analyze.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/between.rs b/src/expr/between.rs deleted file mode 100644 index 6943b6c3b..000000000 --- a/src/expr/between.rs +++ /dev/null @@ -1,84 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::expr::Between; -use pyo3::prelude::*; - -use crate::expr::PyExpr; - -#[pyclass( - from_py_object, - frozen, - name = "Between", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyBetween { - between: Between, -} - -impl From for Between { - fn from(between: PyBetween) -> Self { - between.between - } -} - -impl From for PyBetween { - fn from(between: Between) -> PyBetween { - PyBetween { between } - } -} - -impl Display for PyBetween { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Between - Expr: {:?} - Negated: {:?} - Low: {:?} - High: {:?}", - &self.between.expr, &self.between.negated, &self.between.low, &self.between.high - ) - } -} - -#[pymethods] -impl PyBetween { - fn expr(&self) -> PyResult { - Ok((*self.between.expr).clone().into()) - } - - fn negated(&self) -> PyResult { - Ok(self.between.negated) - } - - fn low(&self) -> PyResult { - Ok((*self.between.low).clone().into()) - } - - fn high(&self) -> PyResult { - Ok((*self.between.high).clone().into()) - } - - fn __repr__(&self) -> String { - format!("{self}") - } -} diff --git a/src/expr/binary_expr.rs b/src/expr/binary_expr.rs deleted file mode 100644 index 2326ba705..000000000 --- a/src/expr/binary_expr.rs +++ /dev/null @@ -1,64 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::BinaryExpr; -use pyo3::prelude::*; - -use crate::expr::PyExpr; - -#[pyclass( - from_py_object, - frozen, - name = "BinaryExpr", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyBinaryExpr { - expr: BinaryExpr, -} - -impl From for BinaryExpr { - fn from(expr: PyBinaryExpr) -> Self { - expr.expr - } -} - -impl From for PyBinaryExpr { - fn from(expr: BinaryExpr) -> PyBinaryExpr { - PyBinaryExpr { expr } - } -} - -#[pymethods] -impl PyBinaryExpr { - fn left(&self) -> PyExpr { - self.expr.left.as_ref().clone().into() - } - - fn right(&self) -> PyExpr { - self.expr.right.as_ref().clone().into() - } - - fn op(&self) -> String { - format!("{}", self.expr.op) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("{}", self.expr)) - } -} diff --git a/src/expr/bool_expr.rs b/src/expr/bool_expr.rs deleted file mode 100644 index 9e374c7e2..000000000 --- a/src/expr/bool_expr.rs +++ /dev/null @@ -1,383 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::Expr; -use pyo3::prelude::*; - -use super::PyExpr; - -#[pyclass( - from_py_object, - frozen, - name = "Not", - module = "datafusion.expr", - subclass -)] -#[derive(Clone, Debug)] -pub struct PyNot { - expr: Expr, -} - -impl PyNot { - pub fn new(expr: Expr) -> Self { - Self { expr } - } -} - -impl Display for PyNot { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Not - Expr: {}", - &self.expr - ) - } -} - -#[pymethods] -impl PyNot { - fn expr(&self) -> PyResult { - Ok(self.expr.clone().into()) - } -} - -#[pyclass( - from_py_object, - frozen, - name = "IsNotNull", - module = "datafusion.expr", - subclass -)] -#[derive(Clone, Debug)] -pub struct PyIsNotNull { - expr: Expr, -} - -impl PyIsNotNull { - pub fn new(expr: Expr) -> Self { - Self { expr } - } -} - -impl Display for PyIsNotNull { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "IsNotNull - Expr: {}", - &self.expr - ) - } -} - -#[pymethods] -impl PyIsNotNull { - fn expr(&self) -> PyResult { - Ok(self.expr.clone().into()) - } -} - -#[pyclass( - from_py_object, - frozen, - name = "IsNull", - module = "datafusion.expr", - subclass -)] -#[derive(Clone, Debug)] -pub struct PyIsNull { - expr: Expr, -} - -impl PyIsNull { - pub fn new(expr: Expr) -> Self { - Self { expr } - } -} - -impl Display for PyIsNull { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "IsNull - Expr: {}", - &self.expr - ) - } -} - -#[pymethods] -impl PyIsNull { - fn expr(&self) -> PyResult { - Ok(self.expr.clone().into()) - } -} - -#[pyclass( - from_py_object, - frozen, - name = "IsTrue", - module = "datafusion.expr", - subclass -)] -#[derive(Clone, Debug)] -pub struct PyIsTrue { - expr: Expr, -} - -impl PyIsTrue { - pub fn new(expr: Expr) -> Self { - Self { expr } - } -} - -impl Display for PyIsTrue { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "IsTrue - Expr: {}", - &self.expr - ) - } -} - -#[pymethods] -impl PyIsTrue { - fn expr(&self) -> PyResult { - Ok(self.expr.clone().into()) - } -} - -#[pyclass( - from_py_object, - frozen, - name = "IsFalse", - module = "datafusion.expr", - subclass -)] -#[derive(Clone, Debug)] -pub struct PyIsFalse { - expr: Expr, -} - -impl PyIsFalse { - pub fn new(expr: Expr) -> Self { - Self { expr } - } -} - -impl Display for PyIsFalse { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "IsFalse - Expr: {}", - &self.expr - ) - } -} - -#[pymethods] -impl PyIsFalse { - fn expr(&self) -> PyResult { - Ok(self.expr.clone().into()) - } -} - -#[pyclass( - from_py_object, - frozen, - name = "IsUnknown", - module = "datafusion.expr", - subclass -)] -#[derive(Clone, Debug)] -pub struct PyIsUnknown { - expr: Expr, -} - -impl PyIsUnknown { - pub fn new(expr: Expr) -> Self { - Self { expr } - } -} - -impl Display for PyIsUnknown { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "IsUnknown - Expr: {}", - &self.expr - ) - } -} - -#[pymethods] -impl PyIsUnknown { - fn expr(&self) -> PyResult { - Ok(self.expr.clone().into()) - } -} - -#[pyclass( - from_py_object, - frozen, - name = "IsNotTrue", - module = "datafusion.expr", - subclass -)] -#[derive(Clone, Debug)] -pub struct PyIsNotTrue { - expr: Expr, -} - -impl PyIsNotTrue { - pub fn new(expr: Expr) -> Self { - Self { expr } - } -} - -impl Display for PyIsNotTrue { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "IsNotTrue - Expr: {}", - &self.expr - ) - } -} - -#[pymethods] -impl PyIsNotTrue { - fn expr(&self) -> PyResult { - Ok(self.expr.clone().into()) - } -} - -#[pyclass( - from_py_object, - frozen, - name = "IsNotFalse", - module = "datafusion.expr", - subclass -)] -#[derive(Clone, Debug)] -pub struct PyIsNotFalse { - expr: Expr, -} - -impl PyIsNotFalse { - pub fn new(expr: Expr) -> Self { - Self { expr } - } -} - -impl Display for PyIsNotFalse { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "IsNotFalse - Expr: {}", - &self.expr - ) - } -} - -#[pymethods] -impl PyIsNotFalse { - fn expr(&self) -> PyResult { - Ok(self.expr.clone().into()) - } -} - -#[pyclass( - from_py_object, - frozen, - name = "IsNotUnknown", - module = "datafusion.expr", - subclass -)] -#[derive(Clone, Debug)] -pub struct PyIsNotUnknown { - expr: Expr, -} - -impl PyIsNotUnknown { - pub fn new(expr: Expr) -> Self { - Self { expr } - } -} - -impl Display for PyIsNotUnknown { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "IsNotUnknown - Expr: {}", - &self.expr - ) - } -} - -#[pymethods] -impl PyIsNotUnknown { - fn expr(&self) -> PyResult { - Ok(self.expr.clone().into()) - } -} - -#[pyclass( - from_py_object, - frozen, - name = "Negative", - module = "datafusion.expr", - subclass -)] -#[derive(Clone, Debug)] -pub struct PyNegative { - expr: Expr, -} - -impl PyNegative { - pub fn new(expr: Expr) -> Self { - Self { expr } - } -} - -impl Display for PyNegative { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Negative - Expr: {}", - &self.expr - ) - } -} - -#[pymethods] -impl PyNegative { - fn expr(&self) -> PyResult { - Ok(self.expr.clone().into()) - } -} diff --git a/src/expr/case.rs b/src/expr/case.rs deleted file mode 100644 index 4f00449d8..000000000 --- a/src/expr/case.rs +++ /dev/null @@ -1,64 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::Case; -use pyo3::prelude::*; - -use crate::expr::PyExpr; - -#[pyclass( - from_py_object, - frozen, - name = "Case", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyCase { - case: Case, -} - -impl From for Case { - fn from(case: PyCase) -> Self { - case.case - } -} - -impl From for PyCase { - fn from(case: Case) -> PyCase { - PyCase { case } - } -} - -#[pymethods] -impl PyCase { - fn expr(&self) -> Option { - self.case.expr.as_ref().map(|e| (**e).clone().into()) - } - - fn when_then_expr(&self) -> Vec<(PyExpr, PyExpr)> { - self.case - .when_then_expr - .iter() - .map(|e| ((*e.0).clone().into(), (*e.1).clone().into())) - .collect() - } - - fn else_expr(&self) -> Option { - self.case.else_expr.as_ref().map(|e| (**e).clone().into()) - } -} diff --git a/src/expr/cast.rs b/src/expr/cast.rs deleted file mode 100644 index 37d603538..000000000 --- a/src/expr/cast.rs +++ /dev/null @@ -1,86 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::{Cast, TryCast}; -use pyo3::prelude::*; - -use crate::common::data_type::PyDataType; -use crate::expr::PyExpr; - -#[pyclass( - from_py_object, - frozen, - name = "Cast", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyCast { - cast: Cast, -} - -impl From for Cast { - fn from(cast: PyCast) -> Self { - cast.cast - } -} - -impl From for PyCast { - fn from(cast: Cast) -> PyCast { - PyCast { cast } - } -} - -#[pymethods] -impl PyCast { - fn expr(&self) -> PyResult { - Ok((*self.cast.expr).clone().into()) - } - - fn data_type(&self) -> PyResult { - Ok(self.cast.data_type.clone().into()) - } -} - -#[pyclass(from_py_object, name = "TryCast", module = "datafusion.expr", subclass)] -#[derive(Clone)] -pub struct PyTryCast { - try_cast: TryCast, -} - -impl From for TryCast { - fn from(try_cast: PyTryCast) -> Self { - try_cast.try_cast - } -} - -impl From for PyTryCast { - fn from(try_cast: TryCast) -> PyTryCast { - PyTryCast { try_cast } - } -} - -#[pymethods] -impl PyTryCast { - fn expr(&self) -> PyResult { - Ok((*self.try_cast.expr).clone().into()) - } - - fn data_type(&self) -> PyResult { - Ok(self.try_cast.data_type.clone().into()) - } -} diff --git a/src/expr/column.rs b/src/expr/column.rs deleted file mode 100644 index c1238f98a..000000000 --- a/src/expr/column.rs +++ /dev/null @@ -1,66 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::common::Column; -use pyo3::prelude::*; - -#[pyclass( - from_py_object, - frozen, - name = "Column", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyColumn { - pub col: Column, -} - -impl PyColumn { - pub fn new(col: Column) -> Self { - Self { col } - } -} - -impl From for PyColumn { - fn from(col: Column) -> PyColumn { - PyColumn { col } - } -} - -#[pymethods] -impl PyColumn { - /// Get the column name - fn name(&self) -> String { - self.col.name.clone() - } - - /// Get the column relation - fn relation(&self) -> Option { - self.col.relation.as_ref().map(|r| format!("{r}")) - } - - /// Get the fully-qualified column name - fn qualified_name(&self) -> String { - self.col.flat_name() - } - - /// Get a String representation of this column - fn __repr__(&self) -> String { - self.qualified_name() - } -} diff --git a/src/expr/conditional_expr.rs b/src/expr/conditional_expr.rs deleted file mode 100644 index ea21fdb20..000000000 --- a/src/expr/conditional_expr.rs +++ /dev/null @@ -1,84 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::conditional_expressions::CaseBuilder; -use datafusion::prelude::Expr; -use pyo3::prelude::*; - -use crate::errors::PyDataFusionResult; -use crate::expr::PyExpr; - -// TODO(tsaucer) replace this all with CaseBuilder after it implements Clone -#[derive(Clone, Debug)] -#[pyclass( - from_py_object, - name = "CaseBuilder", - module = "datafusion.expr", - subclass, - frozen -)] -pub struct PyCaseBuilder { - expr: Option, - when: Vec, - then: Vec, -} - -#[pymethods] -impl PyCaseBuilder { - #[new] - pub fn new(expr: Option) -> Self { - Self { - expr: expr.map(Into::into), - when: vec![], - then: vec![], - } - } - - pub fn when(&self, when: PyExpr, then: PyExpr) -> PyCaseBuilder { - let mut case_builder = self.clone(); - case_builder.when.push(when.into()); - case_builder.then.push(then.into()); - - case_builder - } - - fn otherwise(&self, else_expr: PyExpr) -> PyDataFusionResult { - let case_builder = CaseBuilder::new( - self.expr.clone().map(Box::new), - self.when.clone(), - self.then.clone(), - Some(Box::new(else_expr.into())), - ); - - let expr = case_builder.end()?; - - Ok(expr.into()) - } - - fn end(&self) -> PyDataFusionResult { - let case_builder = CaseBuilder::new( - self.expr.clone().map(Box::new), - self.when.clone(), - self.then.clone(), - None, - ); - - let expr = case_builder.end()?; - - Ok(expr.into()) - } -} diff --git a/src/expr/copy_to.rs b/src/expr/copy_to.rs deleted file mode 100644 index 78e53cdff..000000000 --- a/src/expr/copy_to.rs +++ /dev/null @@ -1,149 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::collections::HashMap; -use std::fmt::{self, Display, Formatter}; -use std::sync::Arc; - -use datafusion::common::file_options::file_type::FileType; -use datafusion::logical_expr::dml::CopyTo; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "CopyTo", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyCopyTo { - copy: CopyTo, -} - -impl From for CopyTo { - fn from(copy: PyCopyTo) -> Self { - copy.copy - } -} - -impl From for PyCopyTo { - fn from(copy: CopyTo) -> PyCopyTo { - PyCopyTo { copy } - } -} - -impl Display for PyCopyTo { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "CopyTo: {:?}", self.copy.output_url) - } -} - -impl LogicalNode for PyCopyTo { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.copy.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} - -#[pymethods] -impl PyCopyTo { - #[new] - pub fn new( - input: PyLogicalPlan, - output_url: String, - partition_by: Vec, - file_type: PyFileType, - options: HashMap, - ) -> Self { - PyCopyTo { - copy: CopyTo::new( - input.plan(), - output_url, - partition_by, - file_type.file_type, - options, - ), - } - } - - fn input(&self) -> PyLogicalPlan { - PyLogicalPlan::from((*self.copy.input).clone()) - } - - fn output_url(&self) -> String { - self.copy.output_url.clone() - } - - fn partition_by(&self) -> Vec { - self.copy.partition_by.clone() - } - - fn file_type(&self) -> PyFileType { - PyFileType { - file_type: self.copy.file_type.clone(), - } - } - - fn options(&self) -> HashMap { - self.copy.options.clone() - } - - fn __repr__(&self) -> PyResult { - Ok(format!("CopyTo({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("CopyTo".to_string()) - } -} - -#[pyclass( - from_py_object, - frozen, - name = "FileType", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyFileType { - file_type: Arc, -} - -impl Display for PyFileType { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "FileType: {}", self.file_type) - } -} - -#[pymethods] -impl PyFileType { - fn __repr__(&self) -> PyResult { - Ok(format!("FileType({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("FileType".to_string()) - } -} diff --git a/src/expr/create_catalog.rs b/src/expr/create_catalog.rs deleted file mode 100644 index fa95980c0..000000000 --- a/src/expr/create_catalog.rs +++ /dev/null @@ -1,105 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; -use std::sync::Arc; - -use datafusion::logical_expr::CreateCatalog; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "CreateCatalog", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyCreateCatalog { - create: CreateCatalog, -} - -impl From for CreateCatalog { - fn from(create: PyCreateCatalog) -> Self { - create.create - } -} - -impl From for PyCreateCatalog { - fn from(create: CreateCatalog) -> PyCreateCatalog { - PyCreateCatalog { create } - } -} - -impl Display for PyCreateCatalog { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "CreateCatalog: {:?}", self.create.catalog_name) - } -} - -#[pymethods] -impl PyCreateCatalog { - #[new] - pub fn new( - catalog_name: String, - if_not_exists: bool, - schema: PyDFSchema, - ) -> PyResult { - Ok(PyCreateCatalog { - create: CreateCatalog { - catalog_name, - if_not_exists, - schema: Arc::new(schema.into()), - }, - }) - } - - pub fn catalog_name(&self) -> String { - self.create.catalog_name.clone() - } - - pub fn if_not_exists(&self) -> bool { - self.create.if_not_exists - } - - pub fn schema(&self) -> PyDFSchema { - (*self.create.schema).clone().into() - } - - fn __repr__(&self) -> PyResult { - Ok(format!("CreateCatalog({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("CreateCatalog".to_string()) - } -} - -impl LogicalNode for PyCreateCatalog { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/create_catalog_schema.rs b/src/expr/create_catalog_schema.rs deleted file mode 100644 index d836284a0..000000000 --- a/src/expr/create_catalog_schema.rs +++ /dev/null @@ -1,105 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; -use std::sync::Arc; - -use datafusion::logical_expr::CreateCatalogSchema; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "CreateCatalogSchema", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyCreateCatalogSchema { - create: CreateCatalogSchema, -} - -impl From for CreateCatalogSchema { - fn from(create: PyCreateCatalogSchema) -> Self { - create.create - } -} - -impl From for PyCreateCatalogSchema { - fn from(create: CreateCatalogSchema) -> PyCreateCatalogSchema { - PyCreateCatalogSchema { create } - } -} - -impl Display for PyCreateCatalogSchema { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "CreateCatalogSchema: {:?}", self.create.schema_name) - } -} - -#[pymethods] -impl PyCreateCatalogSchema { - #[new] - pub fn new( - schema_name: String, - if_not_exists: bool, - schema: PyDFSchema, - ) -> PyResult { - Ok(PyCreateCatalogSchema { - create: CreateCatalogSchema { - schema_name, - if_not_exists, - schema: Arc::new(schema.into()), - }, - }) - } - - pub fn schema_name(&self) -> String { - self.create.schema_name.clone() - } - - pub fn if_not_exists(&self) -> bool { - self.create.if_not_exists - } - - pub fn schema(&self) -> PyDFSchema { - (*self.create.schema).clone().into() - } - - fn __repr__(&self) -> PyResult { - Ok(format!("CreateCatalogSchema({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("CreateCatalogSchema".to_string()) - } -} - -impl LogicalNode for PyCreateCatalogSchema { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/create_external_table.rs b/src/expr/create_external_table.rs deleted file mode 100644 index 980eea131..000000000 --- a/src/expr/create_external_table.rs +++ /dev/null @@ -1,192 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::collections::HashMap; -use std::fmt::{self, Display, Formatter}; -use std::sync::Arc; - -use datafusion::logical_expr::CreateExternalTable; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use super::sort_expr::PySortExpr; -use crate::common::df_schema::PyDFSchema; -use crate::common::schema::PyConstraints; -use crate::expr::PyExpr; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "CreateExternalTable", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyCreateExternalTable { - create: CreateExternalTable, -} - -impl From for CreateExternalTable { - fn from(create: PyCreateExternalTable) -> Self { - create.create - } -} - -impl From for PyCreateExternalTable { - fn from(create: CreateExternalTable) -> PyCreateExternalTable { - PyCreateExternalTable { create } - } -} - -impl Display for PyCreateExternalTable { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "CreateExternalTable: {:?}{}", - self.create.name, self.create.constraints - ) - } -} - -#[pymethods] -impl PyCreateExternalTable { - #[allow(clippy::too_many_arguments)] - #[new] - #[pyo3(signature = (schema, name, location, file_type, table_partition_cols, if_not_exists, or_replace, temporary, order_exprs, unbounded, options, constraints, column_defaults, definition=None))] - pub fn new( - schema: PyDFSchema, - name: String, - location: String, - file_type: String, - table_partition_cols: Vec, - if_not_exists: bool, - or_replace: bool, - temporary: bool, - order_exprs: Vec>, - unbounded: bool, - options: HashMap, - constraints: PyConstraints, - column_defaults: HashMap, - definition: Option, - ) -> Self { - let create = CreateExternalTable { - schema: Arc::new(schema.into()), - name: name.into(), - location, - file_type, - table_partition_cols, - if_not_exists, - or_replace, - temporary, - definition, - order_exprs: order_exprs - .into_iter() - .map(|vec| vec.into_iter().map(|s| s.into()).collect::>()) - .collect::>(), - unbounded, - options, - constraints: constraints.constraints, - column_defaults: column_defaults - .into_iter() - .map(|(k, v)| (k, v.into())) - .collect(), - }; - PyCreateExternalTable { create } - } - - pub fn schema(&self) -> PyDFSchema { - (*self.create.schema).clone().into() - } - - pub fn name(&self) -> PyResult { - Ok(self.create.name.to_string()) - } - - pub fn location(&self) -> String { - self.create.location.clone() - } - - pub fn file_type(&self) -> String { - self.create.file_type.clone() - } - - pub fn table_partition_cols(&self) -> Vec { - self.create.table_partition_cols.clone() - } - - pub fn if_not_exists(&self) -> bool { - self.create.if_not_exists - } - - pub fn temporary(&self) -> bool { - self.create.temporary - } - - pub fn definition(&self) -> Option { - self.create.definition.clone() - } - - pub fn order_exprs(&self) -> Vec> { - self.create - .order_exprs - .iter() - .map(|vec| vec.iter().map(|s| s.clone().into()).collect()) - .collect() - } - - pub fn unbounded(&self) -> bool { - self.create.unbounded - } - - pub fn options(&self) -> HashMap { - self.create.options.clone() - } - - pub fn constraints(&self) -> PyConstraints { - PyConstraints { - constraints: self.create.constraints.clone(), - } - } - - pub fn column_defaults(&self) -> HashMap { - self.create - .column_defaults - .iter() - .map(|(k, v)| (k.clone(), v.clone().into())) - .collect() - } - - fn __repr__(&self) -> PyResult { - Ok(format!("CreateExternalTable({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("CreateExternalTable".to_string()) - } -} - -impl LogicalNode for PyCreateExternalTable { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/create_function.rs b/src/expr/create_function.rs deleted file mode 100644 index 622858913..000000000 --- a/src/expr/create_function.rs +++ /dev/null @@ -1,207 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; -use std::sync::Arc; - -use datafusion::logical_expr::{ - CreateFunction, CreateFunctionBody, OperateFunctionArg, Volatility, -}; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::PyExpr; -use super::logical_node::LogicalNode; -use crate::common::data_type::PyDataType; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "CreateFunction", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyCreateFunction { - create: CreateFunction, -} - -impl From for CreateFunction { - fn from(create: PyCreateFunction) -> Self { - create.create - } -} - -impl From for PyCreateFunction { - fn from(create: CreateFunction) -> PyCreateFunction { - PyCreateFunction { create } - } -} - -impl Display for PyCreateFunction { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "CreateFunction: name {:?}", self.create.name) - } -} - -#[pyclass( - from_py_object, - frozen, - name = "OperateFunctionArg", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyOperateFunctionArg { - arg: OperateFunctionArg, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass( - from_py_object, - frozen, - eq, - eq_int, - name = "Volatility", - module = "datafusion.expr" -)] -pub enum PyVolatility { - Immutable, - Stable, - Volatile, -} - -#[pyclass( - from_py_object, - frozen, - name = "CreateFunctionBody", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyCreateFunctionBody { - body: CreateFunctionBody, -} - -#[pymethods] -impl PyCreateFunctionBody { - pub fn language(&self) -> Option { - self.body - .language - .as_ref() - .map(|language| language.to_string()) - } - - pub fn behavior(&self) -> Option { - self.body.behavior.as_ref().map(|behavior| match behavior { - Volatility::Immutable => PyVolatility::Immutable, - Volatility::Stable => PyVolatility::Stable, - Volatility::Volatile => PyVolatility::Volatile, - }) - } - - pub fn function_body(&self) -> Option { - self.body - .function_body - .as_ref() - .map(|function_body| function_body.clone().into()) - } -} - -#[pymethods] -impl PyCreateFunction { - #[new] - #[pyo3(signature = (or_replace, temporary, name, params, schema, return_type=None, args=None))] - pub fn new( - or_replace: bool, - temporary: bool, - name: String, - params: PyCreateFunctionBody, - schema: PyDFSchema, - return_type: Option, - args: Option>, - ) -> Self { - PyCreateFunction { - create: CreateFunction { - or_replace, - temporary, - name, - args: args.map(|args| args.into_iter().map(|arg| arg.arg).collect()), - return_type: return_type.map(|return_type| return_type.data_type), - params: params.body, - schema: Arc::new(schema.into()), - }, - } - } - - pub fn or_replace(&self) -> bool { - self.create.or_replace - } - - pub fn temporary(&self) -> bool { - self.create.temporary - } - - pub fn name(&self) -> String { - self.create.name.clone() - } - - pub fn params(&self) -> PyCreateFunctionBody { - PyCreateFunctionBody { - body: self.create.params.clone(), - } - } - - pub fn schema(&self) -> PyDFSchema { - (*self.create.schema).clone().into() - } - - pub fn return_type(&self) -> Option { - self.create - .return_type - .as_ref() - .map(|return_type| return_type.clone().into()) - } - - pub fn args(&self) -> Option> { - self.create.args.as_ref().map(|args| { - args.iter() - .map(|arg| PyOperateFunctionArg { arg: arg.clone() }) - .collect() - }) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("CreateFunction({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("CreateFunction".to_string()) - } -} - -impl LogicalNode for PyCreateFunction { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/create_index.rs b/src/expr/create_index.rs deleted file mode 100644 index 5f9bd11e8..000000000 --- a/src/expr/create_index.rs +++ /dev/null @@ -1,135 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; -use std::sync::Arc; - -use datafusion::logical_expr::CreateIndex; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use super::sort_expr::PySortExpr; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "CreateIndex", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyCreateIndex { - create: CreateIndex, -} - -impl From for CreateIndex { - fn from(create: PyCreateIndex) -> Self { - create.create - } -} - -impl From for PyCreateIndex { - fn from(create: CreateIndex) -> PyCreateIndex { - PyCreateIndex { create } - } -} - -impl Display for PyCreateIndex { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "CreateIndex: {:?}", self.create.name) - } -} - -#[pymethods] -impl PyCreateIndex { - #[new] - #[pyo3(signature = (table, columns, unique, if_not_exists, schema, name=None, using=None))] - pub fn new( - table: String, - columns: Vec, - unique: bool, - if_not_exists: bool, - schema: PyDFSchema, - name: Option, - using: Option, - ) -> PyResult { - Ok(PyCreateIndex { - create: CreateIndex { - name, - table: table.into(), - using, - columns: columns.iter().map(|c| c.clone().into()).collect(), - unique, - if_not_exists, - schema: Arc::new(schema.into()), - }, - }) - } - - pub fn name(&self) -> Option { - self.create.name.clone() - } - - pub fn table(&self) -> PyResult { - Ok(self.create.table.to_string()) - } - - pub fn using(&self) -> Option { - self.create.using.clone() - } - - pub fn columns(&self) -> Vec { - self.create - .columns - .iter() - .map(|c| c.clone().into()) - .collect() - } - - pub fn unique(&self) -> bool { - self.create.unique - } - - pub fn if_not_exists(&self) -> bool { - self.create.if_not_exists - } - - pub fn schema(&self) -> PyDFSchema { - (*self.create.schema).clone().into() - } - - fn __repr__(&self) -> PyResult { - Ok(format!("CreateIndex({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("CreateIndex".to_string()) - } -} - -impl LogicalNode for PyCreateIndex { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/create_memory_table.rs b/src/expr/create_memory_table.rs deleted file mode 100644 index 3214dab0e..000000000 --- a/src/expr/create_memory_table.rs +++ /dev/null @@ -1,103 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::CreateMemoryTable; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "CreateMemoryTable", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyCreateMemoryTable { - create: CreateMemoryTable, -} - -impl From for CreateMemoryTable { - fn from(create: PyCreateMemoryTable) -> Self { - create.create - } -} - -impl From for PyCreateMemoryTable { - fn from(create: CreateMemoryTable) -> PyCreateMemoryTable { - PyCreateMemoryTable { create } - } -} - -impl Display for PyCreateMemoryTable { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "CreateMemoryTable - Name: {:?} - Input: {:?} - if_not_exists: {:?} - or_replace: {:?}", - &self.create.name, - &self.create.input, - &self.create.if_not_exists, - &self.create.or_replace, - ) - } -} - -#[pymethods] -impl PyCreateMemoryTable { - fn name(&self) -> PyResult { - Ok(self.create.name.to_string()) - } - - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - fn if_not_exists(&self) -> bool { - self.create.if_not_exists - } - - fn or_replace(&self) -> bool { - self.create.or_replace - } - - fn __repr__(&self) -> PyResult { - Ok(format!("CreateMemoryTable({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("CreateMemoryTable".to_string()) - } -} - -impl LogicalNode for PyCreateMemoryTable { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.create.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/create_view.rs b/src/expr/create_view.rs deleted file mode 100644 index 6941ef769..000000000 --- a/src/expr/create_view.rs +++ /dev/null @@ -1,112 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::{CreateView, DdlStatement, LogicalPlan}; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::errors::py_type_err; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "CreateView", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyCreateView { - create: CreateView, -} - -impl From for CreateView { - fn from(create: PyCreateView) -> Self { - create.create - } -} - -impl From for PyCreateView { - fn from(create: CreateView) -> PyCreateView { - PyCreateView { create } - } -} - -impl Display for PyCreateView { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "CreateView - name: {:?} - input: {:?} - or_replace: {:?} - definition: {:?}", - &self.create.name, &self.create.input, &self.create.or_replace, &self.create.definition, - ) - } -} - -#[pymethods] -impl PyCreateView { - fn name(&self) -> PyResult { - Ok(self.create.name.to_string()) - } - - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - fn or_replace(&self) -> bool { - self.create.or_replace - } - - fn definition(&self) -> PyResult> { - Ok(self.create.definition.clone()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("CreateView({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("CreateView".to_string()) - } -} - -impl LogicalNode for PyCreateView { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.create.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} - -impl TryFrom for PyCreateView { - type Error = PyErr; - - fn try_from(logical_plan: LogicalPlan) -> Result { - match logical_plan { - LogicalPlan::Ddl(DdlStatement::CreateView(create)) => Ok(PyCreateView { create }), - _ => Err(py_type_err("unexpected plan")), - } - } -} diff --git a/src/expr/describe_table.rs b/src/expr/describe_table.rs deleted file mode 100644 index 73955bb34..000000000 --- a/src/expr/describe_table.rs +++ /dev/null @@ -1,98 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; -use std::sync::Arc; - -use arrow::datatypes::Schema; -use arrow::pyarrow::PyArrowType; -use datafusion::logical_expr::DescribeTable; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "DescribeTable", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyDescribeTable { - describe: DescribeTable, -} - -impl Display for PyDescribeTable { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "DescribeTable") - } -} - -#[pymethods] -impl PyDescribeTable { - #[new] - fn new(schema: PyArrowType, output_schema: PyDFSchema) -> Self { - Self { - describe: DescribeTable { - schema: Arc::new(schema.0), - output_schema: Arc::new(output_schema.into()), - }, - } - } - - pub fn schema(&self) -> PyArrowType { - (*self.describe.schema).clone().into() - } - - pub fn output_schema(&self) -> PyDFSchema { - (*self.describe.output_schema).clone().into() - } - - fn __repr__(&self) -> PyResult { - Ok(format!("DescribeTable({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("DescribeTable".to_string()) - } -} - -impl From for DescribeTable { - fn from(describe: PyDescribeTable) -> Self { - describe.describe - } -} - -impl From for PyDescribeTable { - fn from(describe: DescribeTable) -> PyDescribeTable { - PyDescribeTable { describe } - } -} - -impl LogicalNode for PyDescribeTable { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/distinct.rs b/src/expr/distinct.rs deleted file mode 100644 index 68c2a17fe..000000000 --- a/src/expr/distinct.rs +++ /dev/null @@ -1,100 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::Distinct; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Distinct", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyDistinct { - distinct: Distinct, -} - -impl From for Distinct { - fn from(distinct: PyDistinct) -> Self { - distinct.distinct - } -} - -impl From for PyDistinct { - fn from(distinct: Distinct) -> PyDistinct { - PyDistinct { distinct } - } -} - -impl Display for PyDistinct { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match &self.distinct { - Distinct::All(input) => write!( - f, - "Distinct ALL - \nInput: {input:?}", - ), - Distinct::On(distinct_on) => { - write!( - f, - "Distinct ON - \nInput: {:?}", - distinct_on.input, - ) - } - } - } -} - -#[pymethods] -impl PyDistinct { - /// Retrieves the input `LogicalPlan` to this `Projection` node - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("Distinct({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("Distinct".to_string()) - } -} - -impl LogicalNode for PyDistinct { - fn inputs(&self) -> Vec { - match &self.distinct { - Distinct::All(input) => vec![PyLogicalPlan::from(input.as_ref().clone())], - Distinct::On(distinct_on) => { - vec![PyLogicalPlan::from(distinct_on.input.as_ref().clone())] - } - } - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/dml.rs b/src/expr/dml.rs deleted file mode 100644 index 26f975820..000000000 --- a/src/expr/dml.rs +++ /dev/null @@ -1,149 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::dml::InsertOp; -use datafusion::logical_expr::{DmlStatement, WriteOp}; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::common::schema::PyTableSource; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "DmlStatement", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyDmlStatement { - dml: DmlStatement, -} - -impl From for DmlStatement { - fn from(dml: PyDmlStatement) -> Self { - dml.dml - } -} - -impl From for PyDmlStatement { - fn from(dml: DmlStatement) -> PyDmlStatement { - PyDmlStatement { dml } - } -} - -impl LogicalNode for PyDmlStatement { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.dml.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} - -#[pymethods] -impl PyDmlStatement { - pub fn table_name(&self) -> PyResult { - Ok(self.dml.table_name.to_string()) - } - - pub fn target(&self) -> PyResult { - Ok(PyTableSource { - table_source: self.dml.target.clone(), - }) - } - - pub fn op(&self) -> PyWriteOp { - self.dml.op.clone().into() - } - - pub fn input(&self) -> PyLogicalPlan { - PyLogicalPlan { - plan: self.dml.input.clone(), - } - } - - pub fn output_schema(&self) -> PyDFSchema { - (*self.dml.output_schema).clone().into() - } - - fn __repr__(&self) -> PyResult { - Ok("DmlStatement".to_string()) - } - - fn __name__(&self) -> PyResult { - Ok("DmlStatement".to_string()) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass( - from_py_object, - eq, - eq_int, - name = "WriteOp", - module = "datafusion.expr" -)] -pub enum PyWriteOp { - Append, - Overwrite, - Replace, - Update, - Delete, - Ctas, - Truncate, -} - -impl From for PyWriteOp { - fn from(write_op: WriteOp) -> Self { - match write_op { - WriteOp::Insert(InsertOp::Append) => PyWriteOp::Append, - WriteOp::Insert(InsertOp::Overwrite) => PyWriteOp::Overwrite, - WriteOp::Insert(InsertOp::Replace) => PyWriteOp::Replace, - WriteOp::Update => PyWriteOp::Update, - WriteOp::Delete => PyWriteOp::Delete, - WriteOp::Ctas => PyWriteOp::Ctas, - WriteOp::Truncate => PyWriteOp::Truncate, - } - } -} - -impl From for WriteOp { - fn from(py: PyWriteOp) -> Self { - match py { - PyWriteOp::Append => WriteOp::Insert(InsertOp::Append), - PyWriteOp::Overwrite => WriteOp::Insert(InsertOp::Overwrite), - PyWriteOp::Replace => WriteOp::Insert(InsertOp::Replace), - PyWriteOp::Update => WriteOp::Update, - PyWriteOp::Delete => WriteOp::Delete, - PyWriteOp::Ctas => WriteOp::Ctas, - PyWriteOp::Truncate => WriteOp::Truncate, - } - } -} - -#[pymethods] -impl PyWriteOp { - fn name(&self) -> String { - let write_op: WriteOp = self.clone().into(); - write_op.name().to_string() - } -} diff --git a/src/expr/drop_catalog_schema.rs b/src/expr/drop_catalog_schema.rs deleted file mode 100644 index fd5105332..000000000 --- a/src/expr/drop_catalog_schema.rs +++ /dev/null @@ -1,123 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; -use std::sync::Arc; - -use datafusion::common::SchemaReference; -use datafusion::logical_expr::DropCatalogSchema; -use datafusion::sql::TableReference; -use pyo3::IntoPyObjectExt; -use pyo3::exceptions::PyValueError; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "DropCatalogSchema", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyDropCatalogSchema { - drop: DropCatalogSchema, -} - -impl From for DropCatalogSchema { - fn from(drop: PyDropCatalogSchema) -> Self { - drop.drop - } -} - -impl From for PyDropCatalogSchema { - fn from(drop: DropCatalogSchema) -> PyDropCatalogSchema { - PyDropCatalogSchema { drop } - } -} - -impl Display for PyDropCatalogSchema { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "DropCatalogSchema") - } -} - -fn parse_schema_reference(name: String) -> PyResult { - match name.into() { - TableReference::Bare { table } => Ok(SchemaReference::Bare { schema: table }), - TableReference::Partial { schema, table } => Ok(SchemaReference::Full { - schema: table, - catalog: schema, - }), - TableReference::Full { - catalog: _, - schema: _, - table: _, - } => Err(PyErr::new::( - "Invalid schema specifier (has 3 parts)".to_string(), - )), - } -} - -#[pymethods] -impl PyDropCatalogSchema { - #[new] - fn new(name: String, schema: PyDFSchema, if_exists: bool, cascade: bool) -> PyResult { - let name = parse_schema_reference(name)?; - Ok(PyDropCatalogSchema { - drop: DropCatalogSchema { - name, - schema: Arc::new(schema.into()), - if_exists, - cascade, - }, - }) - } - - fn name(&self) -> PyResult { - Ok(self.drop.name.to_string()) - } - - fn schema(&self) -> PyDFSchema { - (*self.drop.schema).clone().into() - } - - fn if_exists(&self) -> PyResult { - Ok(self.drop.if_exists) - } - - fn cascade(&self) -> PyResult { - Ok(self.drop.cascade) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("DropCatalogSchema({self})")) - } -} - -impl LogicalNode for PyDropCatalogSchema { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/drop_function.rs b/src/expr/drop_function.rs deleted file mode 100644 index 0599dd49e..000000000 --- a/src/expr/drop_function.rs +++ /dev/null @@ -1,100 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; -use std::sync::Arc; - -use datafusion::logical_expr::DropFunction; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "DropFunction", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyDropFunction { - drop: DropFunction, -} - -impl From for DropFunction { - fn from(drop: PyDropFunction) -> Self { - drop.drop - } -} - -impl From for PyDropFunction { - fn from(drop: DropFunction) -> PyDropFunction { - PyDropFunction { drop } - } -} - -impl Display for PyDropFunction { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "DropFunction") - } -} - -#[pymethods] -impl PyDropFunction { - #[new] - fn new(name: String, schema: PyDFSchema, if_exists: bool) -> PyResult { - Ok(PyDropFunction { - drop: DropFunction { - name, - schema: Arc::new(schema.into()), - if_exists, - }, - }) - } - fn name(&self) -> PyResult { - Ok(self.drop.name.clone()) - } - - fn schema(&self) -> PyDFSchema { - (*self.drop.schema).clone().into() - } - - fn if_exists(&self) -> PyResult { - Ok(self.drop.if_exists) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("DropFunction({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("DropFunction".to_string()) - } -} - -impl LogicalNode for PyDropFunction { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/drop_table.rs b/src/expr/drop_table.rs deleted file mode 100644 index 46fe67465..000000000 --- a/src/expr/drop_table.rs +++ /dev/null @@ -1,95 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::logical_plan::DropTable; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "DropTable", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyDropTable { - drop: DropTable, -} - -impl From for DropTable { - fn from(drop: PyDropTable) -> Self { - drop.drop - } -} - -impl From for PyDropTable { - fn from(drop: DropTable) -> PyDropTable { - PyDropTable { drop } - } -} - -impl Display for PyDropTable { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "DropTable - name: {:?} - if_exists: {:?} - schema: {:?}", - &self.drop.name, &self.drop.if_exists, &self.drop.schema, - ) - } -} - -#[pymethods] -impl PyDropTable { - fn name(&self) -> PyResult { - Ok(self.drop.name.to_string()) - } - - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - fn if_exists(&self) -> bool { - self.drop.if_exists - } - - fn __repr__(&self) -> PyResult { - Ok(format!("DropTable({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("DropTable".to_string()) - } -} - -impl LogicalNode for PyDropTable { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/drop_view.rs b/src/expr/drop_view.rs deleted file mode 100644 index 0d0c51f13..000000000 --- a/src/expr/drop_view.rs +++ /dev/null @@ -1,106 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; -use std::sync::Arc; - -use datafusion::logical_expr::DropView; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "DropView", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyDropView { - drop: DropView, -} - -impl From for DropView { - fn from(drop: PyDropView) -> Self { - drop.drop - } -} - -impl From for PyDropView { - fn from(drop: DropView) -> PyDropView { - PyDropView { drop } - } -} - -impl Display for PyDropView { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "DropView: {name:?} if not exist:={if_exists}", - name = self.drop.name, - if_exists = self.drop.if_exists - ) - } -} - -#[pymethods] -impl PyDropView { - #[new] - fn new(name: String, schema: PyDFSchema, if_exists: bool) -> PyResult { - Ok(PyDropView { - drop: DropView { - name: name.into(), - schema: Arc::new(schema.into()), - if_exists, - }, - }) - } - - fn name(&self) -> PyResult { - Ok(self.drop.name.to_string()) - } - - fn schema(&self) -> PyDFSchema { - (*self.drop.schema).clone().into() - } - - fn if_exists(&self) -> PyResult { - Ok(self.drop.if_exists) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("DropView({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("DropView".to_string()) - } -} - -impl LogicalNode for PyDropView { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/empty_relation.rs b/src/expr/empty_relation.rs deleted file mode 100644 index f3c237731..000000000 --- a/src/expr/empty_relation.rs +++ /dev/null @@ -1,94 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::EmptyRelation; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "EmptyRelation", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyEmptyRelation { - empty: EmptyRelation, -} - -impl From for EmptyRelation { - fn from(empty_relation: PyEmptyRelation) -> Self { - empty_relation.empty - } -} - -impl From for PyEmptyRelation { - fn from(empty: EmptyRelation) -> PyEmptyRelation { - PyEmptyRelation { empty } - } -} - -impl Display for PyEmptyRelation { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Empty Relation - Produce One Row: {:?} - Schema: {:?}", - &self.empty.produce_one_row, &self.empty.schema - ) - } -} - -#[pymethods] -impl PyEmptyRelation { - fn produce_one_row(&self) -> PyResult { - Ok(self.empty.produce_one_row) - } - - /// Resulting Schema for this `EmptyRelation` node instance - fn schema(&self) -> PyResult { - Ok((*self.empty.schema).clone().into()) - } - - /// Get a String representation of this column - fn __repr__(&self) -> String { - format!("{self}") - } - - fn __name__(&self) -> PyResult { - Ok("EmptyRelation".to_string()) - } -} - -impl LogicalNode for PyEmptyRelation { - fn inputs(&self) -> Vec { - // table scans are leaf nodes and do not have inputs - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/exists.rs b/src/expr/exists.rs deleted file mode 100644 index d2e816127..000000000 --- a/src/expr/exists.rs +++ /dev/null @@ -1,50 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::expr::Exists; -use pyo3::prelude::*; - -use super::subquery::PySubquery; - -#[pyclass( - from_py_object, - frozen, - name = "Exists", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyExists { - exists: Exists, -} - -impl From for PyExists { - fn from(exists: Exists) -> Self { - PyExists { exists } - } -} - -#[pymethods] -impl PyExists { - fn subquery(&self) -> PySubquery { - self.exists.subquery.clone().into() - } - - fn negated(&self) -> bool { - self.exists.negated - } -} diff --git a/src/expr/explain.rs b/src/expr/explain.rs deleted file mode 100644 index 6259951de..000000000 --- a/src/expr/explain.rs +++ /dev/null @@ -1,119 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::LogicalPlan; -use datafusion::logical_expr::logical_plan::Explain; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::errors::py_type_err; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Explain", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyExplain { - explain: Explain, -} - -impl From for Explain { - fn from(explain: PyExplain) -> Self { - explain.explain - } -} - -impl From for PyExplain { - fn from(explain: Explain) -> PyExplain { - PyExplain { explain } - } -} - -impl Display for PyExplain { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Explain - verbose: {:?} - plan: {:?} - stringified_plans: {:?} - schema: {:?} - logical_optimization_succeeded: {:?}", - &self.explain.verbose, - &self.explain.plan, - &self.explain.stringified_plans, - &self.explain.schema, - &self.explain.logical_optimization_succeeded - ) - } -} - -#[pymethods] -impl PyExplain { - fn explain_string(&self) -> PyResult> { - let mut string_plans: Vec = Vec::new(); - for stringified_plan in &self.explain.stringified_plans { - string_plans.push((*stringified_plan.plan).clone()); - } - Ok(string_plans) - } - - fn verbose(&self) -> bool { - self.explain.verbose - } - - fn plan(&self) -> PyResult { - Ok(PyLogicalPlan::from((*self.explain.plan).clone())) - } - - fn schema(&self) -> PyDFSchema { - (*self.explain.schema).clone().into() - } - - fn logical_optimization_succceeded(&self) -> bool { - self.explain.logical_optimization_succeeded - } -} - -impl TryFrom for PyExplain { - type Error = PyErr; - - fn try_from(logical_plan: LogicalPlan) -> Result { - match logical_plan { - LogicalPlan::Explain(explain) => Ok(PyExplain { explain }), - _ => Err(py_type_err("unexpected plan")), - } - } -} - -impl LogicalNode for PyExplain { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/extension.rs b/src/expr/extension.rs deleted file mode 100644 index a0b617565..000000000 --- a/src/expr/extension.rs +++ /dev/null @@ -1,58 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::Extension; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Extension", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyExtension { - pub node: Extension, -} - -impl From for PyExtension { - fn from(node: Extension) -> PyExtension { - PyExtension { node } - } -} - -#[pymethods] -impl PyExtension { - fn name(&self) -> PyResult { - Ok(self.node.node.name().to_string()) - } -} - -impl LogicalNode for PyExtension { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/filter.rs b/src/expr/filter.rs deleted file mode 100644 index 67426806d..000000000 --- a/src/expr/filter.rs +++ /dev/null @@ -1,95 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::logical_plan::Filter; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use crate::common::df_schema::PyDFSchema; -use crate::expr::PyExpr; -use crate::expr::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Filter", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyFilter { - filter: Filter, -} - -impl From for PyFilter { - fn from(filter: Filter) -> PyFilter { - PyFilter { filter } - } -} - -impl From for Filter { - fn from(filter: PyFilter) -> Self { - filter.filter - } -} - -impl Display for PyFilter { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Filter - Predicate: {:?} - Input: {:?}", - &self.filter.predicate, &self.filter.input - ) - } -} - -#[pymethods] -impl PyFilter { - /// Retrieves the predicate expression for this `Filter` - fn predicate(&self) -> PyExpr { - PyExpr::from(self.filter.predicate.clone()) - } - - /// Retrieves the input `LogicalPlan` to this `Filter` node - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - /// Resulting Schema for this `Filter` node instance - fn schema(&self) -> PyDFSchema { - self.filter.input.schema().as_ref().clone().into() - } - - fn __repr__(&self) -> String { - format!("Filter({self})") - } -} - -impl LogicalNode for PyFilter { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.filter.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/grouping_set.rs b/src/expr/grouping_set.rs deleted file mode 100644 index 549a866ed..000000000 --- a/src/expr/grouping_set.rs +++ /dev/null @@ -1,43 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::GroupingSet; -use pyo3::prelude::*; - -#[pyclass( - from_py_object, - frozen, - name = "GroupingSet", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyGroupingSet { - grouping_set: GroupingSet, -} - -impl From for GroupingSet { - fn from(grouping_set: PyGroupingSet) -> Self { - grouping_set.grouping_set - } -} - -impl From for PyGroupingSet { - fn from(grouping_set: GroupingSet) -> PyGroupingSet { - PyGroupingSet { grouping_set } - } -} diff --git a/src/expr/in_list.rs b/src/expr/in_list.rs deleted file mode 100644 index 0612cc21e..000000000 --- a/src/expr/in_list.rs +++ /dev/null @@ -1,54 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::expr::InList; -use pyo3::prelude::*; - -use crate::expr::PyExpr; - -#[pyclass( - from_py_object, - frozen, - name = "InList", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyInList { - in_list: InList, -} - -impl From for PyInList { - fn from(in_list: InList) -> Self { - PyInList { in_list } - } -} - -#[pymethods] -impl PyInList { - fn expr(&self) -> PyExpr { - (*self.in_list.expr).clone().into() - } - - fn list(&self) -> Vec { - self.in_list.list.iter().map(|e| e.clone().into()).collect() - } - - fn negated(&self) -> bool { - self.in_list.negated - } -} diff --git a/src/expr/in_subquery.rs b/src/expr/in_subquery.rs deleted file mode 100644 index 81a2c5794..000000000 --- a/src/expr/in_subquery.rs +++ /dev/null @@ -1,55 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::expr::InSubquery; -use pyo3::prelude::*; - -use super::PyExpr; -use super::subquery::PySubquery; - -#[pyclass( - from_py_object, - frozen, - name = "InSubquery", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyInSubquery { - in_subquery: InSubquery, -} - -impl From for PyInSubquery { - fn from(in_subquery: InSubquery) -> Self { - PyInSubquery { in_subquery } - } -} - -#[pymethods] -impl PyInSubquery { - fn expr(&self) -> PyExpr { - (*self.in_subquery.expr).clone().into() - } - - fn subquery(&self) -> PySubquery { - self.in_subquery.subquery.clone().into() - } - - fn negated(&self) -> bool { - self.in_subquery.negated - } -} diff --git a/src/expr/indexed_field.rs b/src/expr/indexed_field.rs deleted file mode 100644 index 98a90d8d4..000000000 --- a/src/expr/indexed_field.rs +++ /dev/null @@ -1,79 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{Display, Formatter}; - -use datafusion::logical_expr::expr::{GetFieldAccess, GetIndexedField}; -use pyo3::prelude::*; - -use super::literal::PyLiteral; -use crate::expr::PyExpr; - -#[pyclass( - from_py_object, - frozen, - name = "GetIndexedField", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyGetIndexedField { - indexed_field: GetIndexedField, -} - -impl From for GetIndexedField { - fn from(indexed_field: PyGetIndexedField) -> Self { - indexed_field.indexed_field - } -} - -impl From for PyGetIndexedField { - fn from(indexed_field: GetIndexedField) -> PyGetIndexedField { - PyGetIndexedField { indexed_field } - } -} - -impl Display for PyGetIndexedField { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - write!( - f, - "GetIndexedField - Expr: {:?} - Key: {:?}", - &self.indexed_field.expr, &self.indexed_field.field - ) - } -} - -#[pymethods] -impl PyGetIndexedField { - fn expr(&self) -> PyResult { - Ok((*self.indexed_field.expr).clone().into()) - } - - fn key(&self) -> PyResult { - match &self.indexed_field.field { - GetFieldAccess::NamedStructField { name, .. } => Ok(name.clone().into()), - _ => todo!(), - } - } - - /// Get a String representation of this column - fn __repr__(&self) -> String { - format!("{}", self) - } -} diff --git a/src/expr/join.rs b/src/expr/join.rs deleted file mode 100644 index b90f2f57d..000000000 --- a/src/expr/join.rs +++ /dev/null @@ -1,217 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::common::NullEquality; -use datafusion::logical_expr::logical_plan::{Join, JoinConstraint, JoinType}; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use crate::common::df_schema::PyDFSchema; -use crate::expr::PyExpr; -use crate::expr::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -#[pyclass(from_py_object, frozen, name = "JoinType", module = "datafusion.expr")] -pub struct PyJoinType { - join_type: JoinType, -} - -impl From for PyJoinType { - fn from(join_type: JoinType) -> PyJoinType { - PyJoinType { join_type } - } -} - -impl From for JoinType { - fn from(join_type: PyJoinType) -> Self { - join_type.join_type - } -} - -#[pymethods] -impl PyJoinType { - pub fn is_outer(&self) -> bool { - self.join_type.is_outer() - } - - fn __repr__(&self) -> PyResult { - Ok(format!("{}", self.join_type)) - } -} - -impl Display for PyJoinType { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{}", self.join_type) - } -} - -#[derive(Debug, Clone, Copy)] -#[pyclass( - from_py_object, - frozen, - name = "JoinConstraint", - module = "datafusion.expr" -)] -pub struct PyJoinConstraint { - join_constraint: JoinConstraint, -} - -impl From for PyJoinConstraint { - fn from(join_constraint: JoinConstraint) -> PyJoinConstraint { - PyJoinConstraint { join_constraint } - } -} - -impl From for JoinConstraint { - fn from(join_constraint: PyJoinConstraint) -> Self { - join_constraint.join_constraint - } -} - -#[pymethods] -impl PyJoinConstraint { - fn __repr__(&self) -> PyResult { - match self.join_constraint { - JoinConstraint::On => Ok("On".to_string()), - JoinConstraint::Using => Ok("Using".to_string()), - } - } -} - -#[pyclass( - from_py_object, - frozen, - name = "Join", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyJoin { - join: Join, -} - -impl From for PyJoin { - fn from(join: Join) -> PyJoin { - PyJoin { join } - } -} - -impl From for Join { - fn from(join: PyJoin) -> Self { - join.join - } -} - -impl Display for PyJoin { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Join - Left: {:?} - Right: {:?} - On: {:?} - Filter: {:?} - JoinType: {:?} - JoinConstraint: {:?} - Schema: {:?} - NullEquality: {:?}", - &self.join.left, - &self.join.right, - &self.join.on, - &self.join.filter, - &self.join.join_type, - &self.join.join_constraint, - &self.join.schema, - &self.join.null_equality, - ) - } -} - -#[pymethods] -impl PyJoin { - /// Retrieves the left input `LogicalPlan` to this `Join` node - fn left(&self) -> PyResult { - Ok(self.join.left.as_ref().clone().into()) - } - - /// Retrieves the right input `LogicalPlan` to this `Join` node - fn right(&self) -> PyResult { - Ok(self.join.right.as_ref().clone().into()) - } - - /// Retrieves the right input `LogicalPlan` to this `Join` node - fn on(&self) -> PyResult> { - Ok(self - .join - .on - .iter() - .map(|(l, r)| (PyExpr::from(l.clone()), PyExpr::from(r.clone()))) - .collect()) - } - - /// Retrieves the filter `Option` of this `Join` node - fn filter(&self) -> PyResult> { - Ok(self.join.filter.clone().map(Into::into)) - } - - /// Retrieves the `JoinType` to this `Join` node - fn join_type(&self) -> PyResult { - Ok(self.join.join_type.into()) - } - - /// Retrieves the `JoinConstraint` to this `Join` node - fn join_constraint(&self) -> PyResult { - Ok(self.join.join_constraint.into()) - } - - /// Resulting Schema for this `Join` node instance - fn schema(&self) -> PyResult { - Ok(self.join.schema.as_ref().clone().into()) - } - - /// If null_equals_null is true, null == null else null != null - fn null_equals_null(&self) -> PyResult { - match self.join.null_equality { - NullEquality::NullEqualsNothing => Ok(false), - NullEquality::NullEqualsNull => Ok(true), - } - } - - fn __repr__(&self) -> PyResult { - Ok(format!("Join({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("Join".to_string()) - } -} - -impl LogicalNode for PyJoin { - fn inputs(&self) -> Vec { - vec![ - PyLogicalPlan::from((*self.join.left).clone()), - PyLogicalPlan::from((*self.join.right).clone()), - ] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/like.rs b/src/expr/like.rs deleted file mode 100644 index 417dc9182..000000000 --- a/src/expr/like.rs +++ /dev/null @@ -1,215 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::expr::Like; -use pyo3::prelude::*; - -use crate::expr::PyExpr; - -#[pyclass( - from_py_object, - frozen, - name = "Like", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyLike { - like: Like, -} - -impl From for PyLike { - fn from(like: Like) -> PyLike { - PyLike { like } - } -} - -impl From for Like { - fn from(like: PyLike) -> Self { - like.like - } -} - -impl Display for PyLike { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Like - Negated: {:?} - Expr: {:?} - Pattern: {:?} - Escape_Char: {:?}", - &self.negated(), - &self.expr(), - &self.pattern(), - &self.escape_char() - ) - } -} - -#[pymethods] -impl PyLike { - fn negated(&self) -> PyResult { - Ok(self.like.negated) - } - - fn expr(&self) -> PyResult { - Ok((*self.like.expr).clone().into()) - } - - fn pattern(&self) -> PyResult { - Ok((*self.like.pattern).clone().into()) - } - - fn escape_char(&self) -> PyResult> { - Ok(self.like.escape_char) - } - - fn __repr__(&self) -> String { - format!("Like({self})") - } -} - -#[pyclass( - from_py_object, - frozen, - name = "ILike", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyILike { - like: Like, -} - -impl From for PyILike { - fn from(like: Like) -> PyILike { - PyILike { like } - } -} - -impl From for Like { - fn from(like: PyILike) -> Self { - like.like - } -} - -impl Display for PyILike { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "ILike - Negated: {:?} - Expr: {:?} - Pattern: {:?} - Escape_Char: {:?}", - &self.negated(), - &self.expr(), - &self.pattern(), - &self.escape_char() - ) - } -} - -#[pymethods] -impl PyILike { - fn negated(&self) -> PyResult { - Ok(self.like.negated) - } - - fn expr(&self) -> PyResult { - Ok((*self.like.expr).clone().into()) - } - - fn pattern(&self) -> PyResult { - Ok((*self.like.pattern).clone().into()) - } - - fn escape_char(&self) -> PyResult> { - Ok(self.like.escape_char) - } - - fn __repr__(&self) -> String { - format!("Like({self})") - } -} - -#[pyclass( - from_py_object, - frozen, - name = "SimilarTo", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PySimilarTo { - like: Like, -} - -impl From for PySimilarTo { - fn from(like: Like) -> PySimilarTo { - PySimilarTo { like } - } -} - -impl From for Like { - fn from(like: PySimilarTo) -> Self { - like.like - } -} - -impl Display for PySimilarTo { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "SimilarTo - Negated: {:?} - Expr: {:?} - Pattern: {:?} - Escape_Char: {:?}", - &self.negated(), - &self.expr(), - &self.pattern(), - &self.escape_char() - ) - } -} - -#[pymethods] -impl PySimilarTo { - fn negated(&self) -> PyResult { - Ok(self.like.negated) - } - - fn expr(&self) -> PyResult { - Ok((*self.like.expr).clone().into()) - } - - fn pattern(&self) -> PyResult { - Ok((*self.like.pattern).clone().into()) - } - - fn escape_char(&self) -> PyResult> { - Ok(self.like.escape_char) - } - - fn __repr__(&self) -> String { - format!("Like({self})") - } -} diff --git a/src/expr/limit.rs b/src/expr/limit.rs deleted file mode 100644 index c04b8bfa8..000000000 --- a/src/expr/limit.rs +++ /dev/null @@ -1,104 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::logical_plan::Limit; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use crate::common::df_schema::PyDFSchema; -use crate::expr::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Limit", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyLimit { - limit: Limit, -} - -impl From for PyLimit { - fn from(limit: Limit) -> PyLimit { - PyLimit { limit } - } -} - -impl From for Limit { - fn from(limit: PyLimit) -> Self { - limit.limit - } -} - -impl Display for PyLimit { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Limit - Skip: {:?} - Fetch: {:?} - Input: {:?}", - &self.limit.skip, &self.limit.fetch, &self.limit.input - ) - } -} - -#[pymethods] -impl PyLimit { - // NOTE: Upstream now has expressions for skip and fetch - // TODO: Do we still want to expose these? - // REF: https://github.com/apache/datafusion/pull/12836 - - // /// Retrieves the skip value for this `Limit` - // fn skip(&self) -> usize { - // self.limit.skip - // } - - // /// Retrieves the fetch value for this `Limit` - // fn fetch(&self) -> Option { - // self.limit.fetch - // } - - /// Retrieves the input `LogicalPlan` to this `Limit` node - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - /// Resulting Schema for this `Limit` node instance - fn schema(&self) -> PyResult { - Ok(self.limit.input.schema().as_ref().clone().into()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("Limit({self})")) - } -} - -impl LogicalNode for PyLimit { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.limit.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/literal.rs b/src/expr/literal.rs deleted file mode 100644 index 9db0f594b..000000000 --- a/src/expr/literal.rs +++ /dev/null @@ -1,177 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::common::ScalarValue; -use datafusion::logical_expr::expr::FieldMetadata; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use crate::errors::PyDataFusionError; - -#[pyclass( - from_py_object, - name = "Literal", - module = "datafusion.expr", - subclass, - frozen -)] -#[derive(Clone)] -pub struct PyLiteral { - pub value: ScalarValue, - pub metadata: Option, -} - -impl PyLiteral { - pub fn new_with_metadata(value: ScalarValue, metadata: Option) -> PyLiteral { - Self { value, metadata } - } -} - -impl From for ScalarValue { - fn from(lit: PyLiteral) -> ScalarValue { - lit.value - } -} - -impl From for PyLiteral { - fn from(value: ScalarValue) -> PyLiteral { - PyLiteral { - value, - metadata: None, - } - } -} - -macro_rules! extract_scalar_value { - ($self: expr, $variant: ident) => { - match &$self.value { - ScalarValue::$variant(value) => Ok(*value), - other => Err(unexpected_literal_value(other)), - } - }; -} - -#[pymethods] -impl PyLiteral { - /// Get the data type of this literal value - fn data_type(&self) -> String { - format!("{}", self.value.data_type()) - } - - pub fn value_f32(&self) -> PyResult> { - extract_scalar_value!(self, Float32) - } - - pub fn value_f64(&self) -> PyResult> { - extract_scalar_value!(self, Float64) - } - - pub fn value_decimal128(&self) -> PyResult<(Option, u8, i8)> { - match &self.value { - ScalarValue::Decimal128(value, precision, scale) => Ok((*value, *precision, *scale)), - other => Err(unexpected_literal_value(other)), - } - } - - pub fn value_i8(&self) -> PyResult> { - extract_scalar_value!(self, Int8) - } - - pub fn value_i16(&self) -> PyResult> { - extract_scalar_value!(self, Int16) - } - - pub fn value_i32(&self) -> PyResult> { - extract_scalar_value!(self, Int32) - } - - pub fn value_i64(&self) -> PyResult> { - extract_scalar_value!(self, Int64) - } - - pub fn value_u8(&self) -> PyResult> { - extract_scalar_value!(self, UInt8) - } - - pub fn value_u16(&self) -> PyResult> { - extract_scalar_value!(self, UInt16) - } - - pub fn value_u32(&self) -> PyResult> { - extract_scalar_value!(self, UInt32) - } - - pub fn value_u64(&self) -> PyResult> { - extract_scalar_value!(self, UInt64) - } - - pub fn value_date32(&self) -> PyResult> { - extract_scalar_value!(self, Date32) - } - - pub fn value_date64(&self) -> PyResult> { - extract_scalar_value!(self, Date64) - } - - pub fn value_time64(&self) -> PyResult> { - extract_scalar_value!(self, Time64Nanosecond) - } - - pub fn value_timestamp(&self) -> PyResult<(Option, Option)> { - match &self.value { - ScalarValue::TimestampNanosecond(iv, tz) - | ScalarValue::TimestampMicrosecond(iv, tz) - | ScalarValue::TimestampMillisecond(iv, tz) - | ScalarValue::TimestampSecond(iv, tz) => { - Ok((*iv, tz.as_ref().map(|s| s.as_ref().to_string()))) - } - other => Err(unexpected_literal_value(other)), - } - } - - pub fn value_bool(&self) -> PyResult> { - extract_scalar_value!(self, Boolean) - } - - pub fn value_string(&self) -> PyResult> { - match &self.value { - ScalarValue::Utf8(value) => Ok(value.clone()), - other => Err(unexpected_literal_value(other)), - } - } - - pub fn value_interval_day_time(&self) -> PyResult> { - match &self.value { - ScalarValue::IntervalDayTime(Some(iv)) => Ok(Some((iv.days, iv.milliseconds))), - ScalarValue::IntervalDayTime(None) => Ok(None), - other => Err(unexpected_literal_value(other)), - } - } - - #[allow(clippy::wrong_self_convention)] - fn into_type<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("{}", self.value)) - } -} - -fn unexpected_literal_value(value: &ScalarValue) -> PyErr { - PyDataFusionError::Common(format!("getValue() - Unexpected value: {value}")).into() -} diff --git a/src/expr/logical_node.rs b/src/expr/logical_node.rs deleted file mode 100644 index 5aff70059..000000000 --- a/src/expr/logical_node.rs +++ /dev/null @@ -1,29 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use pyo3::{Bound, PyAny, PyResult, Python}; - -use crate::sql::logical::PyLogicalPlan; - -/// Representation of a `LogicalNode` in the in overall `LogicalPlan` -/// any "node" shares these common traits in common. -pub trait LogicalNode { - /// The input plan to the current logical node instance. - fn inputs(&self) -> Vec; - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult>; -} diff --git a/src/expr/placeholder.rs b/src/expr/placeholder.rs deleted file mode 100644 index 6bd88321c..000000000 --- a/src/expr/placeholder.rs +++ /dev/null @@ -1,62 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use arrow::datatypes::Field; -use arrow::pyarrow::PyArrowType; -use datafusion::logical_expr::expr::Placeholder; -use pyo3::prelude::*; - -use crate::common::data_type::PyDataType; - -#[pyclass( - from_py_object, - frozen, - name = "Placeholder", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyPlaceholder { - placeholder: Placeholder, -} - -impl From for PyPlaceholder { - fn from(placeholder: Placeholder) -> Self { - PyPlaceholder { placeholder } - } -} - -#[pymethods] -impl PyPlaceholder { - fn id(&self) -> String { - self.placeholder.id.clone() - } - - fn data_type(&self) -> Option { - self.placeholder - .field - .as_ref() - .map(|f| f.data_type().clone().into()) - } - - fn field(&self) -> Option> { - self.placeholder - .field - .as_ref() - .map(|f| f.as_ref().clone().into()) - } -} diff --git a/src/expr/projection.rs b/src/expr/projection.rs deleted file mode 100644 index 456e06412..000000000 --- a/src/expr/projection.rs +++ /dev/null @@ -1,127 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::Expr; -use datafusion::logical_expr::logical_plan::Projection; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use crate::common::df_schema::PyDFSchema; -use crate::expr::PyExpr; -use crate::expr::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Projection", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyProjection { - pub projection: Projection, -} - -impl PyProjection { - pub fn new(projection: Projection) -> Self { - Self { projection } - } -} - -impl From for PyProjection { - fn from(projection: Projection) -> PyProjection { - PyProjection { projection } - } -} - -impl From for Projection { - fn from(proj: PyProjection) -> Self { - proj.projection - } -} - -impl Display for PyProjection { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Projection - \nExpr(s): {:?} - \nInput: {:?} - \nProjected Schema: {:?}", - &self.projection.expr, &self.projection.input, &self.projection.schema, - ) - } -} - -#[pymethods] -impl PyProjection { - /// Retrieves the expressions for this `Projection` - fn projections(&self) -> PyResult> { - Ok(self - .projection - .expr - .iter() - .map(|e| PyExpr::from(e.clone())) - .collect()) - } - - /// Retrieves the input `LogicalPlan` to this `Projection` node - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - /// Resulting Schema for this `Projection` node instance - fn schema(&self) -> PyResult { - Ok((*self.projection.schema).clone().into()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("Projection({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("Projection".to_string()) - } -} - -impl PyProjection { - /// Projection: Gets the names of the fields that should be projected - pub fn projected_expressions(local_expr: &PyExpr) -> Vec { - let mut projs: Vec = Vec::new(); - match &local_expr.expr { - Expr::Alias(alias) => { - let py_expr: PyExpr = PyExpr::from(*alias.expr.clone()); - projs.extend_from_slice(Self::projected_expressions(&py_expr).as_slice()); - } - _ => projs.push(local_expr.clone()), - } - projs - } -} - -impl LogicalNode for PyProjection { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.projection.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/recursive_query.rs b/src/expr/recursive_query.rs deleted file mode 100644 index e03137b80..000000000 --- a/src/expr/recursive_query.rs +++ /dev/null @@ -1,117 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::RecursiveQuery; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "RecursiveQuery", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyRecursiveQuery { - query: RecursiveQuery, -} - -impl From for RecursiveQuery { - fn from(query: PyRecursiveQuery) -> Self { - query.query - } -} - -impl From for PyRecursiveQuery { - fn from(query: RecursiveQuery) -> PyRecursiveQuery { - PyRecursiveQuery { query } - } -} - -impl Display for PyRecursiveQuery { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "RecursiveQuery {name:?} is_distinct:={is_distinct}", - name = self.query.name, - is_distinct = self.query.is_distinct - ) - } -} - -#[pymethods] -impl PyRecursiveQuery { - #[new] - fn new( - name: String, - static_term: PyLogicalPlan, - recursive_term: PyLogicalPlan, - is_distinct: bool, - ) -> Self { - Self { - query: RecursiveQuery { - name, - static_term: static_term.plan(), - recursive_term: recursive_term.plan(), - is_distinct, - }, - } - } - - fn name(&self) -> PyResult { - Ok(self.query.name.clone()) - } - - fn static_term(&self) -> PyLogicalPlan { - PyLogicalPlan::from((*self.query.static_term).clone()) - } - - fn recursive_term(&self) -> PyLogicalPlan { - PyLogicalPlan::from((*self.query.recursive_term).clone()) - } - - fn is_distinct(&self) -> PyResult { - Ok(self.query.is_distinct) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("RecursiveQuery({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("RecursiveQuery".to_string()) - } -} - -impl LogicalNode for PyRecursiveQuery { - fn inputs(&self) -> Vec { - vec![ - PyLogicalPlan::from((*self.query.static_term).clone()), - PyLogicalPlan::from((*self.query.recursive_term).clone()), - ] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/repartition.rs b/src/expr/repartition.rs deleted file mode 100644 index be39b9978..000000000 --- a/src/expr/repartition.rs +++ /dev/null @@ -1,142 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::logical_plan::Repartition; -use datafusion::logical_expr::{Expr, Partitioning}; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::PyExpr; -use super::logical_node::LogicalNode; -use crate::errors::py_type_err; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Repartition", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyRepartition { - repartition: Repartition, -} - -#[pyclass( - from_py_object, - frozen, - name = "Partitioning", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyPartitioning { - partitioning: Partitioning, -} - -impl From for Partitioning { - fn from(partitioning: PyPartitioning) -> Self { - partitioning.partitioning - } -} - -impl From for PyPartitioning { - fn from(partitioning: Partitioning) -> Self { - PyPartitioning { partitioning } - } -} - -impl From for Repartition { - fn from(repartition: PyRepartition) -> Self { - repartition.repartition - } -} - -impl From for PyRepartition { - fn from(repartition: Repartition) -> PyRepartition { - PyRepartition { repartition } - } -} - -impl Display for PyRepartition { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Repartition - input: {:?} - partitioning_scheme: {:?}", - &self.repartition.input, &self.repartition.partitioning_scheme, - ) - } -} - -#[pymethods] -impl PyRepartition { - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - fn partitioning_scheme(&self) -> PyResult { - Ok(PyPartitioning { - partitioning: self.repartition.partitioning_scheme.clone(), - }) - } - - fn distribute_list(&self) -> PyResult> { - match &self.repartition.partitioning_scheme { - Partitioning::DistributeBy(distribute_list) => Ok(distribute_list - .iter() - .map(|e| PyExpr::from(e.clone())) - .collect()), - _ => Err(py_type_err("unexpected repartition strategy")), - } - } - - fn distribute_columns(&self) -> PyResult { - match &self.repartition.partitioning_scheme { - Partitioning::DistributeBy(distribute_list) => Ok(distribute_list - .iter() - .map(|e| match &e { - Expr::Column(column) => column.name.clone(), - _ => panic!("Encountered a type other than Expr::Column"), - }) - .collect()), - _ => Err(py_type_err("unexpected repartition strategy")), - } - } - - fn __repr__(&self) -> PyResult { - Ok(format!("Repartition({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("Repartition".to_string()) - } -} - -impl LogicalNode for PyRepartition { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.repartition.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/scalar_subquery.rs b/src/expr/scalar_subquery.rs deleted file mode 100644 index c7852a4c4..000000000 --- a/src/expr/scalar_subquery.rs +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::Subquery; -use pyo3::prelude::*; - -use super::subquery::PySubquery; - -#[pyclass( - from_py_object, - frozen, - name = "ScalarSubquery", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyScalarSubquery { - subquery: Subquery, -} - -impl From for Subquery { - fn from(subquery: PyScalarSubquery) -> Self { - subquery.subquery - } -} - -impl From for PyScalarSubquery { - fn from(subquery: Subquery) -> PyScalarSubquery { - PyScalarSubquery { subquery } - } -} - -#[pymethods] -impl PyScalarSubquery { - fn subquery(&self) -> PySubquery { - self.subquery.clone().into() - } -} diff --git a/src/expr/scalar_variable.rs b/src/expr/scalar_variable.rs deleted file mode 100644 index 2d3bc4b76..000000000 --- a/src/expr/scalar_variable.rs +++ /dev/null @@ -1,59 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use arrow::datatypes::FieldRef; -use pyo3::prelude::*; - -use crate::common::data_type::PyDataType; - -#[pyclass( - from_py_object, - frozen, - name = "ScalarVariable", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyScalarVariable { - field: FieldRef, - variables: Vec, -} - -impl PyScalarVariable { - pub fn new(field: &FieldRef, variables: &[String]) -> Self { - Self { - field: field.to_owned(), - variables: variables.to_vec(), - } - } -} - -#[pymethods] -impl PyScalarVariable { - /// Get the data type - fn data_type(&self) -> PyResult { - Ok(self.field.data_type().clone().into()) - } - - fn variables(&self) -> PyResult> { - Ok(self.variables.clone()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("{}{:?}", self.field.data_type(), self.variables)) - } -} diff --git a/src/expr/set_comparison.rs b/src/expr/set_comparison.rs deleted file mode 100644 index 9f0c077e1..000000000 --- a/src/expr/set_comparison.rs +++ /dev/null @@ -1,59 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::expr::SetComparison; -use pyo3::prelude::*; - -use super::subquery::PySubquery; -use crate::expr::PyExpr; - -#[pyclass( - from_py_object, - frozen, - name = "SetComparison", - module = "datafusion.set_comparison", - subclass -)] -#[derive(Clone)] -pub struct PySetComparison { - set_comparison: SetComparison, -} - -impl From for PySetComparison { - fn from(set_comparison: SetComparison) -> Self { - PySetComparison { set_comparison } - } -} - -#[pymethods] -impl PySetComparison { - fn expr(&self) -> PyExpr { - (*self.set_comparison.expr).clone().into() - } - - fn subquery(&self) -> PySubquery { - self.set_comparison.subquery.clone().into() - } - - fn op(&self) -> String { - format!("{}", self.set_comparison.op) - } - - fn quantifier(&self) -> String { - format!("{}", self.set_comparison.quantifier) - } -} diff --git a/src/expr/signature.rs b/src/expr/signature.rs deleted file mode 100644 index 35268e3a9..000000000 --- a/src/expr/signature.rs +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::{TypeSignature, Volatility}; -use pyo3::prelude::*; - -#[allow(dead_code)] -#[pyclass( - from_py_object, - frozen, - name = "Signature", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PySignature { - type_signature: TypeSignature, - volatility: Volatility, -} - -impl PySignature { - pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self { - Self { - type_signature, - volatility, - } - } -} - -#[pymethods] -impl PySignature {} diff --git a/src/expr/sort.rs b/src/expr/sort.rs deleted file mode 100644 index 7c1e654c5..000000000 --- a/src/expr/sort.rs +++ /dev/null @@ -1,110 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::common::DataFusionError; -use datafusion::logical_expr::logical_plan::Sort; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use crate::common::df_schema::PyDFSchema; -use crate::expr::logical_node::LogicalNode; -use crate::expr::sort_expr::PySortExpr; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Sort", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PySort { - sort: Sort, -} - -impl From for PySort { - fn from(sort: Sort) -> PySort { - PySort { sort } - } -} - -impl TryFrom for Sort { - type Error = DataFusionError; - - fn try_from(agg: PySort) -> Result { - Ok(agg.sort) - } -} - -impl Display for PySort { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Sort - \nExpr(s): {:?} - \nInput: {:?} - \nSchema: {:?}", - &self.sort.expr, - self.sort.input, - self.sort.input.schema() - ) - } -} - -#[pymethods] -impl PySort { - /// Retrieves the sort expressions for this `Sort` - fn sort_exprs(&self) -> PyResult> { - Ok(self - .sort - .expr - .iter() - .map(|e| PySortExpr::from(e.clone())) - .collect()) - } - - fn get_fetch_val(&self) -> PyResult> { - Ok(self.sort.fetch) - } - - /// Retrieves the input `LogicalPlan` to this `Sort` node - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - /// Resulting Schema for this `Sort` node instance - fn schema(&self) -> PyDFSchema { - self.sort.input.schema().as_ref().clone().into() - } - - fn __repr__(&self) -> PyResult { - Ok(format!("Sort({self})")) - } -} - -impl LogicalNode for PySort { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.sort.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/sort_expr.rs b/src/expr/sort_expr.rs deleted file mode 100644 index 3c3c86bc1..000000000 --- a/src/expr/sort_expr.rs +++ /dev/null @@ -1,98 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::SortExpr; -use pyo3::prelude::*; - -use crate::expr::PyExpr; - -#[pyclass( - from_py_object, - frozen, - name = "SortExpr", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PySortExpr { - pub(crate) sort: SortExpr, -} - -impl From for SortExpr { - fn from(sort: PySortExpr) -> Self { - sort.sort - } -} - -impl From for PySortExpr { - fn from(sort: SortExpr) -> PySortExpr { - PySortExpr { sort } - } -} - -impl Display for PySortExpr { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Sort - Expr: {:?} - Asc: {:?} - NullsFirst: {:?}", - &self.sort.expr, &self.sort.asc, &self.sort.nulls_first - ) - } -} - -pub fn to_sort_expressions(order_by: Vec) -> Vec { - order_by.iter().map(|e| e.sort.clone()).collect() -} - -pub fn py_sort_expr_list(expr: &[SortExpr]) -> PyResult> { - Ok(expr.iter().map(|e| PySortExpr::from(e.clone())).collect()) -} - -#[pymethods] -impl PySortExpr { - #[new] - fn new(expr: PyExpr, asc: bool, nulls_first: bool) -> Self { - Self { - sort: SortExpr { - expr: expr.into(), - asc, - nulls_first, - }, - } - } - - fn expr(&self) -> PyResult { - Ok(self.sort.expr.clone().into()) - } - - fn ascending(&self) -> PyResult { - Ok(self.sort.asc) - } - - fn nulls_first(&self) -> PyResult { - Ok(self.sort.nulls_first) - } - - fn __repr__(&self) -> String { - format!("{self}") - } -} diff --git a/src/expr/statement.rs b/src/expr/statement.rs deleted file mode 100644 index 5aa1e4e9c..000000000 --- a/src/expr/statement.rs +++ /dev/null @@ -1,558 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use arrow::datatypes::Field; -use arrow::pyarrow::PyArrowType; -use datafusion::logical_expr::{ - Deallocate, Execute, Prepare, ResetVariable, SetVariable, TransactionAccessMode, - TransactionConclusion, TransactionEnd, TransactionIsolationLevel, TransactionStart, -}; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::PyExpr; -use super::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "TransactionStart", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyTransactionStart { - transaction_start: TransactionStart, -} - -impl From for PyTransactionStart { - fn from(transaction_start: TransactionStart) -> PyTransactionStart { - PyTransactionStart { transaction_start } - } -} - -impl TryFrom for TransactionStart { - type Error = PyErr; - - fn try_from(py: PyTransactionStart) -> Result { - Ok(py.transaction_start) - } -} - -impl LogicalNode for PyTransactionStart { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass( - from_py_object, - frozen, - eq, - eq_int, - name = "TransactionAccessMode", - module = "datafusion.expr" -)] -pub enum PyTransactionAccessMode { - ReadOnly, - ReadWrite, -} - -impl From for PyTransactionAccessMode { - fn from(access_mode: TransactionAccessMode) -> PyTransactionAccessMode { - match access_mode { - TransactionAccessMode::ReadOnly => PyTransactionAccessMode::ReadOnly, - TransactionAccessMode::ReadWrite => PyTransactionAccessMode::ReadWrite, - } - } -} - -impl TryFrom for TransactionAccessMode { - type Error = PyErr; - - fn try_from(py: PyTransactionAccessMode) -> Result { - match py { - PyTransactionAccessMode::ReadOnly => Ok(TransactionAccessMode::ReadOnly), - PyTransactionAccessMode::ReadWrite => Ok(TransactionAccessMode::ReadWrite), - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass( - from_py_object, - frozen, - eq, - eq_int, - name = "TransactionIsolationLevel", - module = "datafusion.expr" -)] -pub enum PyTransactionIsolationLevel { - ReadUncommitted, - ReadCommitted, - RepeatableRead, - Serializable, - Snapshot, -} - -impl From for PyTransactionIsolationLevel { - fn from(isolation_level: TransactionIsolationLevel) -> PyTransactionIsolationLevel { - match isolation_level { - TransactionIsolationLevel::ReadUncommitted => { - PyTransactionIsolationLevel::ReadUncommitted - } - TransactionIsolationLevel::ReadCommitted => PyTransactionIsolationLevel::ReadCommitted, - TransactionIsolationLevel::RepeatableRead => { - PyTransactionIsolationLevel::RepeatableRead - } - TransactionIsolationLevel::Serializable => PyTransactionIsolationLevel::Serializable, - TransactionIsolationLevel::Snapshot => PyTransactionIsolationLevel::Snapshot, - } - } -} - -impl TryFrom for TransactionIsolationLevel { - type Error = PyErr; - - fn try_from(value: PyTransactionIsolationLevel) -> Result { - match value { - PyTransactionIsolationLevel::ReadUncommitted => { - Ok(TransactionIsolationLevel::ReadUncommitted) - } - PyTransactionIsolationLevel::ReadCommitted => { - Ok(TransactionIsolationLevel::ReadCommitted) - } - PyTransactionIsolationLevel::RepeatableRead => { - Ok(TransactionIsolationLevel::RepeatableRead) - } - PyTransactionIsolationLevel::Serializable => { - Ok(TransactionIsolationLevel::Serializable) - } - PyTransactionIsolationLevel::Snapshot => Ok(TransactionIsolationLevel::Snapshot), - } - } -} - -#[pymethods] -impl PyTransactionStart { - #[new] - pub fn new( - access_mode: PyTransactionAccessMode, - isolation_level: PyTransactionIsolationLevel, - ) -> PyResult { - let access_mode = access_mode.try_into()?; - let isolation_level = isolation_level.try_into()?; - Ok(PyTransactionStart { - transaction_start: TransactionStart { - access_mode, - isolation_level, - }, - }) - } - - pub fn access_mode(&self) -> PyResult { - Ok(self.transaction_start.access_mode.clone().into()) - } - - pub fn isolation_level(&self) -> PyResult { - Ok(self.transaction_start.isolation_level.clone().into()) - } -} - -#[pyclass( - from_py_object, - frozen, - name = "TransactionEnd", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyTransactionEnd { - transaction_end: TransactionEnd, -} - -impl From for PyTransactionEnd { - fn from(transaction_end: TransactionEnd) -> PyTransactionEnd { - PyTransactionEnd { transaction_end } - } -} - -impl TryFrom for TransactionEnd { - type Error = PyErr; - - fn try_from(py: PyTransactionEnd) -> Result { - Ok(py.transaction_end) - } -} - -impl LogicalNode for PyTransactionEnd { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[pyclass( - from_py_object, - frozen, - eq, - eq_int, - name = "TransactionConclusion", - module = "datafusion.expr" -)] -pub enum PyTransactionConclusion { - Commit, - Rollback, -} - -impl From for PyTransactionConclusion { - fn from(value: TransactionConclusion) -> Self { - match value { - TransactionConclusion::Commit => PyTransactionConclusion::Commit, - TransactionConclusion::Rollback => PyTransactionConclusion::Rollback, - } - } -} - -impl TryFrom for TransactionConclusion { - type Error = PyErr; - - fn try_from(value: PyTransactionConclusion) -> Result { - match value { - PyTransactionConclusion::Commit => Ok(TransactionConclusion::Commit), - PyTransactionConclusion::Rollback => Ok(TransactionConclusion::Rollback), - } - } -} -#[pymethods] -impl PyTransactionEnd { - #[new] - pub fn new(conclusion: PyTransactionConclusion, chain: bool) -> PyResult { - let conclusion = conclusion.try_into()?; - Ok(PyTransactionEnd { - transaction_end: TransactionEnd { conclusion, chain }, - }) - } - - pub fn conclusion(&self) -> PyResult { - Ok(self.transaction_end.conclusion.clone().into()) - } - - pub fn chain(&self) -> bool { - self.transaction_end.chain - } -} - -#[pyclass( - from_py_object, - frozen, - name = "ResetVariable", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyResetVariable { - reset_variable: ResetVariable, -} - -impl From for PyResetVariable { - fn from(reset_variable: ResetVariable) -> PyResetVariable { - PyResetVariable { reset_variable } - } -} - -impl TryFrom for ResetVariable { - type Error = PyErr; - - fn try_from(py: PyResetVariable) -> Result { - Ok(py.reset_variable) - } -} - -impl LogicalNode for PyResetVariable { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} - -#[pymethods] -impl PyResetVariable { - #[new] - pub fn new(variable: String) -> Self { - PyResetVariable { - reset_variable: ResetVariable { variable }, - } - } - - pub fn variable(&self) -> String { - self.reset_variable.variable.clone() - } -} - -#[pyclass( - from_py_object, - frozen, - name = "SetVariable", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PySetVariable { - set_variable: SetVariable, -} - -impl From for PySetVariable { - fn from(set_variable: SetVariable) -> PySetVariable { - PySetVariable { set_variable } - } -} - -impl TryFrom for SetVariable { - type Error = PyErr; - - fn try_from(py: PySetVariable) -> Result { - Ok(py.set_variable) - } -} - -impl LogicalNode for PySetVariable { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} - -#[pymethods] -impl PySetVariable { - #[new] - pub fn new(variable: String, value: String) -> Self { - PySetVariable { - set_variable: SetVariable { variable, value }, - } - } - - pub fn variable(&self) -> String { - self.set_variable.variable.clone() - } - - pub fn value(&self) -> String { - self.set_variable.value.clone() - } -} - -#[pyclass( - from_py_object, - frozen, - name = "Prepare", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyPrepare { - prepare: Prepare, -} - -impl From for PyPrepare { - fn from(prepare: Prepare) -> PyPrepare { - PyPrepare { prepare } - } -} - -impl TryFrom for Prepare { - type Error = PyErr; - - fn try_from(py: PyPrepare) -> Result { - Ok(py.prepare) - } -} - -impl LogicalNode for PyPrepare { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.prepare.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} - -#[pymethods] -impl PyPrepare { - #[new] - pub fn new(name: String, fields: Vec>, input: PyLogicalPlan) -> Self { - let input = input.plan().clone(); - let fields = fields.into_iter().map(|field| Arc::new(field.0)).collect(); - PyPrepare { - prepare: Prepare { - name, - fields, - input, - }, - } - } - - pub fn name(&self) -> String { - self.prepare.name.clone() - } - - pub fn fields(&self) -> Vec> { - self.prepare - .fields - .clone() - .into_iter() - .map(|f| f.as_ref().clone().into()) - .collect() - } - - pub fn input(&self) -> PyLogicalPlan { - PyLogicalPlan { - plan: self.prepare.input.clone(), - } - } -} - -#[pyclass( - from_py_object, - frozen, - name = "Execute", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyExecute { - execute: Execute, -} - -impl From for PyExecute { - fn from(execute: Execute) -> PyExecute { - PyExecute { execute } - } -} - -impl TryFrom for Execute { - type Error = PyErr; - - fn try_from(py: PyExecute) -> Result { - Ok(py.execute) - } -} - -impl LogicalNode for PyExecute { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} - -#[pymethods] -impl PyExecute { - #[new] - pub fn new(name: String, parameters: Vec) -> Self { - let parameters = parameters - .into_iter() - .map(|parameter| parameter.into()) - .collect(); - PyExecute { - execute: Execute { name, parameters }, - } - } - - pub fn name(&self) -> String { - self.execute.name.clone() - } - - pub fn parameters(&self) -> Vec { - self.execute - .parameters - .clone() - .into_iter() - .map(|t| t.into()) - .collect() - } -} - -#[pyclass( - from_py_object, - frozen, - name = "Deallocate", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyDeallocate { - deallocate: Deallocate, -} - -impl From for PyDeallocate { - fn from(deallocate: Deallocate) -> PyDeallocate { - PyDeallocate { deallocate } - } -} - -impl TryFrom for Deallocate { - type Error = PyErr; - - fn try_from(py: PyDeallocate) -> Result { - Ok(py.deallocate) - } -} - -impl LogicalNode for PyDeallocate { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} - -#[pymethods] -impl PyDeallocate { - #[new] - pub fn new(name: String) -> Self { - PyDeallocate { - deallocate: Deallocate { name }, - } - } - - pub fn name(&self) -> String { - self.deallocate.name.clone() - } -} diff --git a/src/expr/subquery.rs b/src/expr/subquery.rs deleted file mode 100644 index c6fa83db8..000000000 --- a/src/expr/subquery.rs +++ /dev/null @@ -1,87 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::Subquery; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Subquery", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PySubquery { - subquery: Subquery, -} - -impl From for Subquery { - fn from(subquery: PySubquery) -> Self { - subquery.subquery - } -} - -impl From for PySubquery { - fn from(subquery: Subquery) -> PySubquery { - PySubquery { subquery } - } -} - -impl Display for PySubquery { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Subquery - Subquery: {:?} - outer_ref_columns: {:?}", - self.subquery.subquery, self.subquery.outer_ref_columns, - ) - } -} - -#[pymethods] -impl PySubquery { - /// Retrieves the input `LogicalPlan` to this `Projection` node - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("Subquery({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("Subquery".to_string()) - } -} - -impl LogicalNode for PySubquery { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/subquery_alias.rs b/src/expr/subquery_alias.rs deleted file mode 100644 index a6b09e842..000000000 --- a/src/expr/subquery_alias.rs +++ /dev/null @@ -1,98 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::SubqueryAlias; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "SubqueryAlias", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PySubqueryAlias { - subquery_alias: SubqueryAlias, -} - -impl From for SubqueryAlias { - fn from(subquery_alias: PySubqueryAlias) -> Self { - subquery_alias.subquery_alias - } -} - -impl From for PySubqueryAlias { - fn from(subquery_alias: SubqueryAlias) -> PySubqueryAlias { - PySubqueryAlias { subquery_alias } - } -} - -impl Display for PySubqueryAlias { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "SubqueryAlias - Inputs(s): {:?} - Alias: {:?} - Schema: {:?}", - self.subquery_alias.input, self.subquery_alias.alias, self.subquery_alias.schema, - ) - } -} - -#[pymethods] -impl PySubqueryAlias { - /// Retrieves the input `LogicalPlan` to this `Projection` node - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - /// Resulting Schema for this `Projection` node instance - fn schema(&self) -> PyResult { - Ok((*self.subquery_alias.schema).clone().into()) - } - - fn alias(&self) -> PyResult { - Ok(self.subquery_alias.alias.to_string()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("SubqueryAlias({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("SubqueryAlias".to_string()) - } -} - -impl LogicalNode for PySubqueryAlias { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.subquery_alias.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/table_scan.rs b/src/expr/table_scan.rs deleted file mode 100644 index 8ba7e4a69..000000000 --- a/src/expr/table_scan.rs +++ /dev/null @@ -1,161 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::common::TableReference; -use datafusion::logical_expr::logical_plan::TableScan; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use crate::common::df_schema::PyDFSchema; -use crate::expr::PyExpr; -use crate::expr::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "TableScan", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyTableScan { - table_scan: TableScan, -} - -impl PyTableScan { - pub fn new(table_scan: TableScan) -> Self { - Self { table_scan } - } -} - -impl From for TableScan { - fn from(tbl_scan: PyTableScan) -> TableScan { - tbl_scan.table_scan - } -} - -impl From for PyTableScan { - fn from(table_scan: TableScan) -> PyTableScan { - PyTableScan { table_scan } - } -} - -impl Display for PyTableScan { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "TableScan\nTable Name: {} - Projections: {:?} - Projected Schema: {:?} - Filters: {:?}", - &self.table_scan.table_name, - &self.py_projections(), - &self.py_schema(), - &self.py_filters(), - ) - } -} - -#[pymethods] -impl PyTableScan { - /// Retrieves the name of the table represented by this `TableScan` instance - #[pyo3(name = "table_name")] - fn py_table_name(&self) -> PyResult { - Ok(format!("{}", self.table_scan.table_name)) - } - - #[pyo3(name = "fqn")] - fn fqn(&self) -> PyResult<(Option, Option, String)> { - let table_ref: TableReference = self.table_scan.table_name.clone(); - Ok(match table_ref { - TableReference::Bare { table } => (None, None, table.to_string()), - TableReference::Partial { schema, table } => { - (None, Some(schema.to_string()), table.to_string()) - } - TableReference::Full { - catalog, - schema, - table, - } => ( - Some(catalog.to_string()), - Some(schema.to_string()), - table.to_string(), - ), - }) - } - - /// The column indexes that should be. Note if this is empty then - /// all columns should be read by the `TableProvider`. This function - /// provides a Tuple of the (index, column_name) to make things simpler - /// for the calling code since often times the name is preferred to - /// the index which is a lower level abstraction. - #[pyo3(name = "projection")] - fn py_projections(&self) -> PyResult> { - match &self.table_scan.projection { - Some(indices) => { - let schema = self.table_scan.source.schema(); - Ok(indices - .iter() - .map(|i| (*i, schema.field(*i).name().to_string())) - .collect()) - } - None => Ok(vec![]), - } - } - - /// Resulting schema from the `TableScan` operation - #[pyo3(name = "schema")] - fn py_schema(&self) -> PyResult { - Ok((*self.table_scan.projected_schema).clone().into()) - } - - /// Certain `TableProvider` physical readers offer the capability to filter rows that - /// are read at read time. These `filters` are contained here. - #[pyo3(name = "filters")] - fn py_filters(&self) -> PyResult> { - Ok(self - .table_scan - .filters - .iter() - .map(|expr| PyExpr::from(expr.clone())) - .collect()) - } - - /// Optional number of rows that should be read at read time by the `TableProvider` - #[pyo3(name = "fetch")] - fn py_fetch(&self) -> PyResult> { - Ok(self.table_scan.fetch) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("TableScan({self})")) - } -} - -impl LogicalNode for PyTableScan { - fn inputs(&self) -> Vec { - // table scans are leaf nodes and do not have inputs - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/union.rs b/src/expr/union.rs deleted file mode 100644 index a3b9efe91..000000000 --- a/src/expr/union.rs +++ /dev/null @@ -1,97 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::logical_plan::Union; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use crate::common::df_schema::PyDFSchema; -use crate::expr::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Union", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyUnion { - union_: Union, -} - -impl From for PyUnion { - fn from(union_: Union) -> PyUnion { - PyUnion { union_ } - } -} - -impl From for Union { - fn from(union_: PyUnion) -> Self { - union_.union_ - } -} - -impl Display for PyUnion { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Union - Inputs: {:?} - Schema: {:?}", - &self.union_.inputs, &self.union_.schema, - ) - } -} - -#[pymethods] -impl PyUnion { - /// Retrieves the input `LogicalPlan` to this `Union` node - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - /// Resulting Schema for this `Union` node instance - fn schema(&self) -> PyResult { - Ok(self.union_.schema.as_ref().clone().into()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("Union({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("Union".to_string()) - } -} - -impl LogicalNode for PyUnion { - fn inputs(&self) -> Vec { - self.union_ - .inputs - .iter() - .map(|x| x.as_ref().clone().into()) - .collect() - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/unnest.rs b/src/expr/unnest.rs deleted file mode 100644 index 880d0a279..000000000 --- a/src/expr/unnest.rs +++ /dev/null @@ -1,93 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::logical_plan::Unnest; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use crate::common::df_schema::PyDFSchema; -use crate::expr::logical_node::LogicalNode; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Unnest", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyUnnest { - unnest_: Unnest, -} - -impl From for PyUnnest { - fn from(unnest_: Unnest) -> PyUnnest { - PyUnnest { unnest_ } - } -} - -impl From for Unnest { - fn from(unnest_: PyUnnest) -> Self { - unnest_.unnest_ - } -} - -impl Display for PyUnnest { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Unnest - Inputs: {:?} - Schema: {:?}", - &self.unnest_.input, &self.unnest_.schema, - ) - } -} - -#[pymethods] -impl PyUnnest { - /// Retrieves the input `LogicalPlan` to this `Unnest` node - fn input(&self) -> PyResult> { - Ok(Self::inputs(self)) - } - - /// Resulting Schema for this `Unnest` node instance - fn schema(&self) -> PyResult { - Ok(self.unnest_.schema.as_ref().clone().into()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("Unnest({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("Unnest".to_string()) - } -} - -impl LogicalNode for PyUnnest { - fn inputs(&self) -> Vec { - vec![PyLogicalPlan::from((*self.unnest_.input).clone())] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/expr/unnest_expr.rs b/src/expr/unnest_expr.rs deleted file mode 100644 index 97feef1d1..000000000 --- a/src/expr/unnest_expr.rs +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::logical_expr::expr::Unnest; -use pyo3::prelude::*; - -use super::PyExpr; - -#[pyclass( - from_py_object, - frozen, - name = "UnnestExpr", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyUnnestExpr { - unnest: Unnest, -} - -impl From for PyUnnestExpr { - fn from(unnest: Unnest) -> PyUnnestExpr { - PyUnnestExpr { unnest } - } -} - -impl From for Unnest { - fn from(unnest: PyUnnestExpr) -> Self { - unnest.unnest - } -} - -impl Display for PyUnnestExpr { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Unnest - Expr: {:?}", - &self.unnest.expr, - ) - } -} - -#[pymethods] -impl PyUnnestExpr { - /// Retrieves the expression that is being unnested - fn expr(&self) -> PyResult { - Ok((*self.unnest.expr).clone().into()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("UnnestExpr({self})")) - } - - fn __name__(&self) -> PyResult { - Ok("UnnestExpr".to_string()) - } -} diff --git a/src/expr/values.rs b/src/expr/values.rs deleted file mode 100644 index d40b0e7cf..000000000 --- a/src/expr/values.rs +++ /dev/null @@ -1,93 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use datafusion::logical_expr::Values; -use pyo3::prelude::*; -use pyo3::{IntoPyObjectExt, PyErr, PyResult, Python, pyclass}; - -use super::PyExpr; -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Values", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyValues { - values: Values, -} - -impl From for PyValues { - fn from(values: Values) -> PyValues { - PyValues { values } - } -} - -impl TryFrom for Values { - type Error = PyErr; - - fn try_from(py: PyValues) -> Result { - Ok(py.values) - } -} - -impl LogicalNode for PyValues { - fn inputs(&self) -> Vec { - vec![] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} - -#[pymethods] -impl PyValues { - #[new] - pub fn new(schema: PyDFSchema, values: Vec>) -> PyResult { - let values = values - .into_iter() - .map(|row| row.into_iter().map(|expr| expr.into()).collect()) - .collect(); - Ok(PyValues { - values: Values { - schema: Arc::new(schema.into()), - values, - }, - }) - } - - pub fn schema(&self) -> PyResult { - Ok((*self.values.schema).clone().into()) - } - - pub fn values(&self) -> Vec> { - self.values - .values - .clone() - .into_iter() - .map(|row| row.into_iter().map(|expr| expr.into()).collect()) - .collect() - } -} diff --git a/src/expr/window.rs b/src/expr/window.rs deleted file mode 100644 index 92d909bfc..000000000 --- a/src/expr/window.rs +++ /dev/null @@ -1,307 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{self, Display, Formatter}; - -use datafusion::common::{DataFusionError, ScalarValue}; -use datafusion::logical_expr::{Expr, Window, WindowFrame, WindowFrameBound, WindowFrameUnits}; -use pyo3::IntoPyObjectExt; -use pyo3::exceptions::PyNotImplementedError; -use pyo3::prelude::*; - -use super::py_expr_list; -use crate::common::data_type::PyScalarValue; -use crate::common::df_schema::PyDFSchema; -use crate::errors::{PyDataFusionResult, py_type_err}; -use crate::expr::PyExpr; -use crate::expr::logical_node::LogicalNode; -use crate::expr::sort_expr::{PySortExpr, py_sort_expr_list}; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "WindowExpr", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyWindowExpr { - window: Window, -} - -#[pyclass( - from_py_object, - frozen, - name = "WindowFrame", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyWindowFrame { - window_frame: WindowFrame, -} - -impl From for WindowFrame { - fn from(window_frame: PyWindowFrame) -> Self { - window_frame.window_frame - } -} - -impl From for PyWindowFrame { - fn from(window_frame: WindowFrame) -> PyWindowFrame { - PyWindowFrame { window_frame } - } -} - -#[pyclass( - from_py_object, - frozen, - name = "WindowFrameBound", - module = "datafusion.expr", - subclass -)] -#[derive(Clone)] -pub struct PyWindowFrameBound { - frame_bound: WindowFrameBound, -} - -impl From for Window { - fn from(window: PyWindowExpr) -> Window { - window.window - } -} - -impl From for PyWindowExpr { - fn from(window: Window) -> PyWindowExpr { - PyWindowExpr { window } - } -} - -impl From for PyWindowFrameBound { - fn from(frame_bound: WindowFrameBound) -> Self { - PyWindowFrameBound { frame_bound } - } -} - -impl Display for PyWindowExpr { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "Over\n - Window Expr: {:?} - Schema: {:?}", - &self.window.window_expr, &self.window.schema - ) - } -} - -impl Display for PyWindowFrame { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - write!( - f, - "OVER ({} BETWEEN {} AND {})", - self.window_frame.units, self.window_frame.start_bound, self.window_frame.end_bound - ) - } -} - -#[pymethods] -impl PyWindowExpr { - /// Returns the schema of the Window - pub fn schema(&self) -> PyResult { - Ok(self.window.schema.as_ref().clone().into()) - } - - /// Returns window expressions - pub fn get_window_expr(&self) -> PyResult> { - py_expr_list(&self.window.window_expr) - } - - /// Returns order by columns in a window function expression - pub fn get_sort_exprs(&self, expr: PyExpr) -> PyResult> { - match expr.expr.unalias() { - Expr::WindowFunction(boxed_window_fn) => { - py_sort_expr_list(&boxed_window_fn.params.order_by) - } - other => Err(not_window_function_err(other)), - } - } - - /// Return partition by columns in a window function expression - pub fn get_partition_exprs(&self, expr: PyExpr) -> PyResult> { - match expr.expr.unalias() { - Expr::WindowFunction(boxed_window_fn) => { - py_expr_list(&boxed_window_fn.params.partition_by) - } - other => Err(not_window_function_err(other)), - } - } - - /// Return input args for window function - pub fn get_args(&self, expr: PyExpr) -> PyResult> { - match expr.expr.unalias() { - Expr::WindowFunction(boxed_window_fn) => py_expr_list(&boxed_window_fn.params.args), - other => Err(not_window_function_err(other)), - } - } - - /// Return window function name - pub fn window_func_name(&self, expr: PyExpr) -> PyResult { - match expr.expr.unalias() { - Expr::WindowFunction(boxed_window_fn) => Ok(boxed_window_fn.fun.to_string()), - other => Err(not_window_function_err(other)), - } - } - - /// Returns a Pywindow frame for a given window function expression - pub fn get_frame(&self, expr: PyExpr) -> Option { - match expr.expr.unalias() { - Expr::WindowFunction(boxed_window_fn) => { - Some(boxed_window_fn.params.window_frame.into()) - } - _ => None, - } - } -} - -fn not_window_function_err(expr: Expr) -> PyErr { - py_type_err(format!( - "Provided {} Expr {:?} is not a WindowFunction type", - expr.variant_name(), - expr - )) -} - -#[pymethods] -impl PyWindowFrame { - #[new] - #[pyo3(signature=(unit, start_bound, end_bound))] - pub fn new( - unit: &str, - start_bound: Option, - end_bound: Option, - ) -> PyResult { - let units = unit.to_ascii_lowercase(); - let units = match units.as_str() { - "rows" => WindowFrameUnits::Rows, - "range" => WindowFrameUnits::Range, - "groups" => WindowFrameUnits::Groups, - _ => { - return Err(PyNotImplementedError::new_err(format!("{units:?}"))); - } - }; - let start_bound = match start_bound { - Some(start_bound) => WindowFrameBound::Preceding(start_bound.0), - None => match units { - WindowFrameUnits::Range => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), - WindowFrameUnits::Rows => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), - WindowFrameUnits::Groups => { - return Err(PyNotImplementedError::new_err(format!("{units:?}"))); - } - }, - }; - let end_bound = match end_bound { - Some(end_bound) => WindowFrameBound::Following(end_bound.0), - None => match units { - WindowFrameUnits::Rows => WindowFrameBound::Following(ScalarValue::UInt64(None)), - WindowFrameUnits::Range => WindowFrameBound::Following(ScalarValue::UInt64(None)), - WindowFrameUnits::Groups => { - return Err(PyNotImplementedError::new_err(format!("{units:?}"))); - } - }, - }; - Ok(PyWindowFrame { - window_frame: WindowFrame::new_bounds(units, start_bound, end_bound), - }) - } - - /// Returns the window frame units for the bounds - pub fn get_frame_units(&self) -> PyResult { - Ok(self.window_frame.units.to_string()) - } - /// Returns starting bound - pub fn get_lower_bound(&self) -> PyResult { - Ok(self.window_frame.start_bound.clone().into()) - } - /// Returns end bound - pub fn get_upper_bound(&self) -> PyResult { - Ok(self.window_frame.end_bound.clone().into()) - } - - /// Get a String representation of this window frame - fn __repr__(&self) -> String { - format!("{self}") - } -} - -#[pymethods] -impl PyWindowFrameBound { - /// Returns if the frame bound is current row - pub fn is_current_row(&self) -> bool { - matches!(self.frame_bound, WindowFrameBound::CurrentRow) - } - - /// Returns if the frame bound is preceding - pub fn is_preceding(&self) -> bool { - matches!(self.frame_bound, WindowFrameBound::Preceding(_)) - } - - /// Returns if the frame bound is following - pub fn is_following(&self) -> bool { - matches!(self.frame_bound, WindowFrameBound::Following(_)) - } - /// Returns the offset of the window frame - pub fn get_offset(&self) -> PyDataFusionResult> { - match &self.frame_bound { - WindowFrameBound::Preceding(val) | WindowFrameBound::Following(val) => match val { - x if x.is_null() => Ok(None), - ScalarValue::UInt64(v) => Ok(*v), - // The cast below is only safe because window bounds cannot be negative - ScalarValue::Int64(v) => Ok(v.map(|n| n as u64)), - ScalarValue::Utf8(Some(s)) => match s.parse::() { - Ok(s) => Ok(Some(s)), - Err(_e) => Err(DataFusionError::Plan(format!( - "Unable to parse u64 from Utf8 value '{s}'" - )) - .into()), - }, - ref x => { - Err(DataFusionError::Plan(format!("Unexpected window frame bound: {x}")).into()) - } - }, - WindowFrameBound::CurrentRow => Ok(None), - } - } - /// Returns if the frame bound is unbounded - pub fn is_unbounded(&self) -> PyResult { - match &self.frame_bound { - WindowFrameBound::Preceding(v) | WindowFrameBound::Following(v) => Ok(v.is_null()), - WindowFrameBound::CurrentRow => Ok(false), - } - } -} - -impl LogicalNode for PyWindowExpr { - fn inputs(&self) -> Vec { - vec![self.window.input.as_ref().clone().into()] - } - - fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - self.clone().into_bound_py_any(py) - } -} diff --git a/src/functions.rs b/src/functions.rs deleted file mode 100644 index c32134054..000000000 --- a/src/functions.rs +++ /dev/null @@ -1,1138 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::collections::HashMap; - -use datafusion::common::{Column, ScalarValue, TableReference}; -use datafusion::execution::FunctionRegistry; -use datafusion::functions_aggregate::all_default_aggregate_functions; -use datafusion::functions_window::all_default_window_functions; -use datafusion::logical_expr::expr::{ - Alias, FieldMetadata, NullTreatment as DFNullTreatment, WindowFunction, WindowFunctionParams, -}; -use datafusion::logical_expr::{Expr, ExprFunctionExt, WindowFrame, WindowFunctionDefinition, lit}; -use datafusion::{functions, functions_aggregate, functions_window}; -use pyo3::prelude::*; -use pyo3::wrap_pyfunction; - -use crate::common::data_type::{NullTreatment, PyScalarValue}; -use crate::context::PySessionContext; -use crate::errors::{PyDataFusionError, PyDataFusionResult}; -use crate::expr::PyExpr; -use crate::expr::conditional_expr::PyCaseBuilder; -use crate::expr::sort_expr::{PySortExpr, to_sort_expressions}; -use crate::expr::window::PyWindowFrame; - -fn add_builder_fns_to_aggregate( - agg_fn: Expr, - distinct: Option, - filter: Option, - order_by: Option>, - null_treatment: Option, -) -> PyDataFusionResult { - // Since ExprFuncBuilder::new() is private, we can guarantee initializing - // a builder with an `null_treatment` with option None - let mut builder = agg_fn.null_treatment(None); - - if let Some(order_by_cols) = order_by { - let order_by_cols = to_sort_expressions(order_by_cols); - builder = builder.order_by(order_by_cols); - } - - if let Some(true) = distinct { - builder = builder.distinct(); - } - - if let Some(filter) = filter { - builder = builder.filter(filter.expr); - } - - builder = builder.null_treatment(null_treatment.map(DFNullTreatment::from)); - - Ok(builder.build()?.into()) -} - -#[pyfunction] -fn in_list(expr: PyExpr, value: Vec, negated: bool) -> PyExpr { - datafusion::logical_expr::in_list( - expr.expr, - value.into_iter().map(|x| x.expr).collect::>(), - negated, - ) - .into() -} - -#[pyfunction] -fn make_array(exprs: Vec) -> PyExpr { - datafusion::functions_nested::expr_fn::make_array(exprs.into_iter().map(|x| x.into()).collect()) - .into() -} - -#[pyfunction] -fn array_concat(exprs: Vec) -> PyExpr { - let exprs = exprs.into_iter().map(|x| x.into()).collect(); - datafusion::functions_nested::expr_fn::array_concat(exprs).into() -} - -#[pyfunction] -fn array_cat(exprs: Vec) -> PyExpr { - array_concat(exprs) -} - -#[pyfunction] -#[pyo3(signature = (array, element, index=None))] -fn array_position(array: PyExpr, element: PyExpr, index: Option) -> PyExpr { - let index = ScalarValue::Int64(index); - let index = Expr::Literal(index, None); - datafusion::functions_nested::expr_fn::array_position(array.into(), element.into(), index) - .into() -} - -#[pyfunction] -#[pyo3(signature = (array, begin, end, stride=None))] -fn array_slice(array: PyExpr, begin: PyExpr, end: PyExpr, stride: Option) -> PyExpr { - datafusion::functions_nested::expr_fn::array_slice( - array.into(), - begin.into(), - end.into(), - stride.map(Into::into), - ) - .into() -} - -/// Computes a binary hash of the given data. type is the algorithm to use. -/// Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, blake2b, and blake3. -// #[pyfunction(value, method)] -#[pyfunction] -fn digest(value: PyExpr, method: PyExpr) -> PyExpr { - PyExpr { - expr: functions::expr_fn::digest(value.expr, method.expr), - } -} - -/// Concatenates the text representations of all the arguments. -/// NULL arguments are ignored. -#[pyfunction] -fn concat(args: Vec) -> PyResult { - let args = args.into_iter().map(|e| e.expr).collect::>(); - Ok(functions::string::expr_fn::concat(args).into()) -} - -/// Concatenates all but the first argument, with separators. -/// The first argument is used as the separator string, and should not be NULL. -/// Other NULL arguments are ignored. -#[pyfunction] -fn concat_ws(sep: String, args: Vec) -> PyResult { - let args = args.into_iter().map(|e| e.expr).collect::>(); - Ok(functions::string::expr_fn::concat_ws(lit(sep), args).into()) -} - -#[pyfunction] -#[pyo3(signature = (values, regex, flags=None))] -fn regexp_like(values: PyExpr, regex: PyExpr, flags: Option) -> PyResult { - Ok(functions::expr_fn::regexp_like(values.expr, regex.expr, flags.map(|x| x.expr)).into()) -} - -#[pyfunction] -#[pyo3(signature = (values, regex, flags=None))] -fn regexp_match(values: PyExpr, regex: PyExpr, flags: Option) -> PyResult { - Ok(functions::expr_fn::regexp_match(values.expr, regex.expr, flags.map(|x| x.expr)).into()) -} - -#[pyfunction] -#[pyo3(signature = (string, pattern, replacement, flags=None))] -/// Replaces substring(s) matching a POSIX regular expression. -fn regexp_replace( - string: PyExpr, - pattern: PyExpr, - replacement: PyExpr, - flags: Option, -) -> PyResult { - Ok(functions::expr_fn::regexp_replace( - string.into(), - pattern.into(), - replacement.into(), - flags.map(|x| x.expr), - ) - .into()) -} - -#[pyfunction] -#[pyo3(signature = (string, pattern, start, flags=None))] -/// Returns the number of matches found in the string. -fn regexp_count( - string: PyExpr, - pattern: PyExpr, - start: Option, - flags: Option, -) -> PyResult { - Ok(functions::expr_fn::regexp_count( - string.expr, - pattern.expr, - start.map(|x| x.expr), - flags.map(|x| x.expr), - ) - .into()) -} - -#[pyfunction] -#[pyo3(signature = (values, regex, start=None, n=None, flags=None, subexpr=None))] -/// Returns the position in a string where the specified occurrence of a regular expression is located -fn regexp_instr( - values: PyExpr, - regex: PyExpr, - start: Option, - n: Option, - flags: Option, - subexpr: Option, -) -> PyResult { - Ok(functions::expr_fn::regexp_instr( - values.into(), - regex.into(), - start.map(|x| x.expr).or(Some(lit(1))), - n.map(|x| x.expr).or(Some(lit(1))), - None, - flags.map(|x| x.expr).or(Some(lit(""))), - subexpr.map(|x| x.expr).or(Some(lit(0))), - ) - .into()) -} - -/// Creates a new Sort Expr -#[pyfunction] -fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) -> PyResult { - Ok(PySortExpr::from(datafusion::logical_expr::expr::Sort { - expr: expr.expr, - asc, - nulls_first, - })) -} - -/// Creates a new Alias Expr -#[pyfunction] -#[pyo3(signature = (expr, name, metadata=None))] -fn alias(expr: PyExpr, name: &str, metadata: Option>) -> PyResult { - let relation: Option = None; - let metadata = metadata.map(|m| FieldMetadata::new(m.into_iter().collect())); - Ok(PyExpr { - expr: datafusion::logical_expr::Expr::Alias( - Alias::new(expr.expr, relation, name).with_metadata(metadata), - ), - }) -} - -/// Create a column reference Expr -#[pyfunction] -fn col(name: &str) -> PyResult { - Ok(PyExpr { - expr: datafusion::logical_expr::Expr::Column(Column::new_unqualified(name)), - }) -} - -/// Create a CASE WHEN statement with literal WHEN expressions for comparison to the base expression. -#[pyfunction] -fn case(expr: PyExpr) -> PyResult { - Ok(PyCaseBuilder::new(Some(expr))) -} - -/// Create a CASE WHEN statement with literal WHEN expressions for comparison to the base expression. -#[pyfunction] -fn when(when: PyExpr, then: PyExpr) -> PyResult { - Ok(PyCaseBuilder::new(None).when(when, then)) -} - -/// Helper function to find the appropriate window function. -/// -/// Search procedure: -/// 1) Search built in window functions, which are being deprecated. -/// 1) If a session context is provided: -/// 1) search User Defined Aggregate Functions (UDAFs) -/// 1) search registered window functions -/// 1) search registered aggregate functions -/// 1) If no function has been found, search default aggregate functions. -/// -/// NOTE: we search the built-ins first because the `UDAF` versions currently do not have the same behavior. -fn find_window_fn( - name: &str, - ctx: Option, -) -> PyDataFusionResult { - if let Some(ctx) = ctx { - // search UDAFs - let udaf = ctx - .ctx - .udaf(name) - .map(WindowFunctionDefinition::AggregateUDF) - .ok(); - - if let Some(udaf) = udaf { - return Ok(udaf); - } - - let session_state = ctx.ctx.state(); - - // search registered window functions - let window_fn = session_state - .window_functions() - .get(name) - .map(|f| WindowFunctionDefinition::WindowUDF(f.clone())); - - if let Some(window_fn) = window_fn { - return Ok(window_fn); - } - - // search registered aggregate functions - let agg_fn = session_state - .aggregate_functions() - .get(name) - .map(|f| WindowFunctionDefinition::AggregateUDF(f.clone())); - - if let Some(agg_fn) = agg_fn { - return Ok(agg_fn); - } - } - - // search default aggregate functions - let agg_fn = all_default_aggregate_functions() - .iter() - .find(|v| v.name() == name || v.aliases().contains(&name.to_string())) - .map(|f| WindowFunctionDefinition::AggregateUDF(f.clone())); - - if let Some(agg_fn) = agg_fn { - return Ok(agg_fn); - } - - // search default window functions - let window_fn = all_default_window_functions() - .iter() - .find(|v| v.name() == name || v.aliases().contains(&name.to_string())) - .map(|f| WindowFunctionDefinition::WindowUDF(f.clone())); - - if let Some(window_fn) = window_fn { - return Ok(window_fn); - } - - Err(PyDataFusionError::Common(format!( - "window function `{name}` not found" - ))) -} - -/// Creates a new Window function expression -#[allow(clippy::too_many_arguments)] -#[pyfunction] -#[pyo3(signature = (name, args, partition_by=None, order_by=None, window_frame=None, filter=None, distinct=false, ctx=None))] -fn window( - name: &str, - args: Vec, - partition_by: Option>, - order_by: Option>, - window_frame: Option, - filter: Option, - distinct: bool, - ctx: Option, -) -> PyResult { - let fun = find_window_fn(name, ctx)?; - - let window_frame = window_frame - .map(|w| w.into()) - .unwrap_or(WindowFrame::new(order_by.as_ref().map(|v| !v.is_empty()))); - let filter = filter.map(|f| f.expr.into()); - - Ok(PyExpr { - expr: datafusion::logical_expr::Expr::WindowFunction(Box::new(WindowFunction { - fun, - params: WindowFunctionParams { - args: args.into_iter().map(|x| x.expr).collect::>(), - partition_by: partition_by - .unwrap_or_default() - .into_iter() - .map(|x| x.expr) - .collect::>(), - order_by: order_by - .unwrap_or_default() - .into_iter() - .map(|x| x.into()) - .collect::>(), - window_frame, - filter, - distinct, - null_treatment: None, - }, - })), - }) -} - -// Generates a [pyo3] wrapper for associated aggregate functions. -// All of the builder options are exposed to the python internal -// function and we rely on the wrappers to only use those that -// are appropriate. -macro_rules! aggregate_function { - ($NAME: ident) => { - aggregate_function!($NAME, expr); - }; - ($NAME: ident, $($arg:ident)*) => { - #[pyfunction] - #[pyo3(signature = ($($arg),*, distinct=None, filter=None, order_by=None, null_treatment=None))] - fn $NAME( - $($arg: PyExpr),*, - distinct: Option, - filter: Option, - order_by: Option>, - null_treatment: Option - ) -> PyDataFusionResult { - let agg_fn = functions_aggregate::expr_fn::$NAME($($arg.into()),*); - - add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) - } - }; -} - -/// Generates a [pyo3] wrapper for [datafusion::functions::expr_fn] -/// -/// These functions have explicit named arguments. -macro_rules! expr_fn { - ($FUNC: ident) => { - expr_fn!($FUNC, , stringify!($FUNC)); - }; - ($FUNC:ident, $($arg:ident)*) => { - expr_fn!($FUNC, $($arg)*, stringify!($FUNC)); - }; - ($FUNC: ident, $DOC: expr) => { - expr_fn!($FUNC, ,$DOC); - }; - ($FUNC: ident, $($arg:ident)*, $DOC: expr) => { - #[doc = $DOC] - #[pyfunction] - fn $FUNC($($arg: PyExpr),*) -> PyExpr { - functions::expr_fn::$FUNC($($arg.into()),*).into() - } - }; -} -/// Generates a [pyo3] wrapper for [datafusion::functions::expr_fn] -/// -/// These functions take a single `Vec` argument using `pyo3(signature = (*args))`. -macro_rules! expr_fn_vec { - ($FUNC: ident) => { - expr_fn_vec!($FUNC, stringify!($FUNC)); - }; - ($FUNC: ident, $DOC: expr) => { - #[doc = $DOC] - #[pyfunction] - #[pyo3(signature = (*args))] - fn $FUNC(args: Vec) -> PyExpr { - let args = args.into_iter().map(|e| e.into()).collect::>(); - functions::expr_fn::$FUNC(args).into() - } - }; -} - -/// Generates a [pyo3] wrapper for [datafusion_functions_nested::expr_fn] -/// -/// These functions have explicit named arguments. -macro_rules! array_fn { - ($FUNC: ident) => { - array_fn!($FUNC, , stringify!($FUNC)); - }; - ($FUNC:ident, $($arg:ident)*) => { - array_fn!($FUNC, $($arg)*, stringify!($FUNC)); - }; - ($FUNC: ident, $DOC: expr) => { - array_fn!($FUNC, , $DOC); - }; - ($FUNC: ident, $($arg:ident)*, $DOC:expr) => { - #[doc = $DOC] - #[pyfunction] - fn $FUNC($($arg: PyExpr),*) -> PyExpr { - datafusion::functions_nested::expr_fn::$FUNC($($arg.into()),*).into() - } - }; -} - -expr_fn!(abs, num); -expr_fn!(acos, num); -expr_fn!(acosh, num); -expr_fn!( - ascii, - arg1, - "Returns the numeric code of the first character of the argument. In UTF8 encoding, returns the Unicode code point of the character. In other multibyte encodings, the argument must be an ASCII character." -); -expr_fn!(asin, num); -expr_fn!(asinh, num); -expr_fn!(atan, num); -expr_fn!(atanh, num); -expr_fn!(atan2, y x); -expr_fn!( - bit_length, - arg, - "Returns number of bits in the string (8 times the octet_length)." -); -expr_fn_vec!( - btrim, - "Removes the longest string containing only characters in characters (a space by default) from the start and end of string." -); -expr_fn!(cbrt, num); -expr_fn!(ceil, num); -expr_fn!( - character_length, - string, - "Returns number of characters in the string." -); -expr_fn!(length, string); -expr_fn!(char_length, string); -expr_fn!(chr, arg, "Returns the character with the given code."); -expr_fn_vec!(coalesce); -expr_fn!(cos, num); -expr_fn!(cosh, num); -expr_fn!(cot, num); -expr_fn!(degrees, num); -expr_fn!(decode, input encoding); -expr_fn!(encode, input encoding); -expr_fn!(ends_with, string suffix, "Returns true if string ends with suffix."); -expr_fn!(exp, num); -expr_fn!(factorial, num); -expr_fn!(floor, num); -expr_fn!(gcd, x y); -expr_fn!( - initcap, - string, - "Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters." -); -expr_fn!(isnan, num); -expr_fn!(iszero, num); -expr_fn!(levenshtein, string1 string2); -expr_fn!(lcm, x y); -expr_fn!(left, string n, "Returns first n characters in the string, or when n is negative, returns all but last |n| characters."); -expr_fn!(ln, num); -expr_fn!(log, base num); -expr_fn!(log10, num); -expr_fn!(log2, num); -expr_fn!(lower, arg1, "Converts the string to all lower case"); -expr_fn_vec!( - lpad, - "Extends the string to length length by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right)." -); -expr_fn_vec!( - ltrim, - "Removes the longest string containing only characters in characters (a space by default) from the start of string." -); -expr_fn!( - md5, - input_arg, - "Computes the MD5 hash of the argument, with the result written in hexadecimal." -); -expr_fn!( - nanvl, - x y, - "Returns x if x is not NaN otherwise returns y." -); -expr_fn!( - nvl, - x y, - "Returns x if x is not NULL otherwise returns y." -); -expr_fn!(nullif, arg_1 arg_2); -expr_fn!( - octet_length, - args, - "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces." -); -expr_fn_vec!(overlay); -expr_fn!(pi); -expr_fn!(power, base exponent); -expr_fn!(radians, num); -expr_fn!(repeat, string n, "Repeats string the specified number of times."); -expr_fn!( - replace, - string from to, - "Replaces all occurrences in string of substring from with substring to." -); -expr_fn!( - reverse, - string, - "Reverses the order of the characters in the string." -); -expr_fn!(right, string n, "Returns last n characters in the string, or when n is negative, returns all but first |n| characters."); -expr_fn_vec!(round); -expr_fn_vec!( - rpad, - "Extends the string to length length by appending the characters fill (a space by default). If the string is already longer than length then it is truncated." -); -expr_fn_vec!( - rtrim, - "Removes the longest string containing only characters in characters (a space by default) from the end of string." -); -expr_fn!(sha224, input_arg1); -expr_fn!(sha256, input_arg1); -expr_fn!(sha384, input_arg1); -expr_fn!(sha512, input_arg1); -expr_fn!(signum, num); -expr_fn!(sin, num); -expr_fn!(sinh, num); -expr_fn!( - split_part, - string delimiter index, - "Splits string at occurrences of delimiter and returns the n'th field (counting from one)." -); -expr_fn!(sqrt, num); -expr_fn!(starts_with, string prefix, "Returns true if string starts with prefix."); -expr_fn!(strpos, string substring, "Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)"); -expr_fn!(substr, string position); -expr_fn!(substr_index, string delimiter count); -expr_fn!(substring, string position length); -expr_fn!(find_in_set, string string_list); -expr_fn!(tan, num); -expr_fn!(tanh, num); -expr_fn!( - to_hex, - arg1, - "Converts the number to its equivalent hexadecimal representation." -); -expr_fn!(now); -expr_fn_vec!(to_date); -expr_fn_vec!(to_local_time); -expr_fn_vec!(to_time); -expr_fn_vec!(to_timestamp); -expr_fn_vec!(to_timestamp_millis); -expr_fn_vec!(to_timestamp_nanos); -expr_fn_vec!(to_timestamp_micros); -expr_fn_vec!(to_timestamp_seconds); -expr_fn_vec!(to_unixtime); -expr_fn!(current_date); -expr_fn!(current_time); -expr_fn!(date_part, part date); -expr_fn!(date_trunc, part date); -expr_fn!(date_bin, stride source origin); -expr_fn!(make_date, year month day); -expr_fn!(to_char, datetime format); - -expr_fn!(translate, string from to, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted."); -expr_fn_vec!( - trim, - "Removes the longest string containing only characters in characters (a space by default) from the start, end, or both ends (BOTH is the default) of string." -); -expr_fn_vec!(trunc); -expr_fn!(upper, arg1, "Converts the string to all upper case."); -expr_fn!(uuid); -expr_fn_vec!(r#struct); // Use raw identifier since struct is a keyword -expr_fn_vec!(named_struct); -expr_fn!(from_unixtime, unixtime); -expr_fn!(arrow_typeof, arg_1); -expr_fn!(arrow_cast, arg_1 datatype); -expr_fn!(random); - -// Array Functions -array_fn!(array_append, array element); -array_fn!(array_to_string, array delimiter); -array_fn!(array_dims, array); -array_fn!(array_distinct, array); -array_fn!(array_element, array element); -array_fn!(array_empty, array); -array_fn!(array_length, array); -array_fn!(array_has, first_array second_array); -array_fn!(array_has_all, first_array second_array); -array_fn!(array_has_any, first_array second_array); -array_fn!(array_positions, array element); -array_fn!(array_ndims, array); -array_fn!(array_prepend, element array); -array_fn!(array_pop_back, array); -array_fn!(array_pop_front, array); -array_fn!(array_remove, array element); -array_fn!(array_remove_n, array element max); -array_fn!(array_remove_all, array element); -array_fn!(array_repeat, element count); -array_fn!(array_replace, array from to); -array_fn!(array_replace_n, array from to max); -array_fn!(array_replace_all, array from to); -array_fn!(array_sort, array desc null_first); -array_fn!(array_intersect, first_array second_array); -array_fn!(array_union, array1 array2); -array_fn!(array_except, first_array second_array); -array_fn!(array_resize, array size value); -array_fn!(cardinality, array); -array_fn!(flatten, array); -array_fn!(range, start stop step); - -aggregate_function!(array_agg); -aggregate_function!(max); -aggregate_function!(min); -aggregate_function!(avg); -aggregate_function!(sum); -aggregate_function!(bit_and); -aggregate_function!(bit_or); -aggregate_function!(bit_xor); -aggregate_function!(bool_and); -aggregate_function!(bool_or); -aggregate_function!(corr, y x); -aggregate_function!(count); -aggregate_function!(covar_samp, y x); -aggregate_function!(covar_pop, y x); -aggregate_function!(median); -aggregate_function!(regr_slope, y x); -aggregate_function!(regr_intercept, y x); -aggregate_function!(regr_count, y x); -aggregate_function!(regr_r2, y x); -aggregate_function!(regr_avgx, y x); -aggregate_function!(regr_avgy, y x); -aggregate_function!(regr_sxx, y x); -aggregate_function!(regr_syy, y x); -aggregate_function!(regr_sxy, y x); -aggregate_function!(stddev); -aggregate_function!(stddev_pop); -aggregate_function!(var_sample); -aggregate_function!(var_pop); -aggregate_function!(approx_distinct); -aggregate_function!(approx_median); - -// Code is commented out since grouping is not yet implemented -// https://github.com/apache/datafusion-python/issues/861 -// aggregate_function!(grouping); - -#[pyfunction] -#[pyo3(signature = (sort_expression, percentile, num_centroids=None, filter=None))] -pub fn approx_percentile_cont( - sort_expression: PySortExpr, - percentile: f64, - num_centroids: Option, // enforces optional arguments at the end, currently - filter: Option, -) -> PyDataFusionResult { - let agg_fn = functions_aggregate::expr_fn::approx_percentile_cont( - sort_expression.sort, - lit(percentile), - num_centroids.map(lit), - ); - - add_builder_fns_to_aggregate(agg_fn, None, filter, None, None) -} - -#[pyfunction] -#[pyo3(signature = (sort_expression, weight, percentile, num_centroids=None, filter=None))] -pub fn approx_percentile_cont_with_weight( - sort_expression: PySortExpr, - weight: PyExpr, - percentile: f64, - num_centroids: Option, - filter: Option, -) -> PyDataFusionResult { - let agg_fn = functions_aggregate::expr_fn::approx_percentile_cont_with_weight( - sort_expression.sort, - weight.expr, - lit(percentile), - num_centroids.map(lit), - ); - - add_builder_fns_to_aggregate(agg_fn, None, filter, None, None) -} - -// We handle last_value explicitly because the signature expects an order_by -// https://github.com/apache/datafusion/issues/12376 -#[pyfunction] -#[pyo3(signature = (expr, distinct=None, filter=None, order_by=None, null_treatment=None))] -pub fn last_value( - expr: PyExpr, - distinct: Option, - filter: Option, - order_by: Option>, - null_treatment: Option, -) -> PyDataFusionResult { - // If we initialize the UDAF with order_by directly, then it gets over-written by the builder - let agg_fn = functions_aggregate::expr_fn::last_value(expr.expr, vec![]); - - add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) -} -// We handle first_value explicitly because the signature expects an order_by -// https://github.com/apache/datafusion/issues/12376 -#[pyfunction] -#[pyo3(signature = (expr, distinct=None, filter=None, order_by=None, null_treatment=None))] -pub fn first_value( - expr: PyExpr, - distinct: Option, - filter: Option, - order_by: Option>, - null_treatment: Option, -) -> PyDataFusionResult { - // If we initialize the UDAF with order_by directly, then it gets over-written by the builder - let agg_fn = functions_aggregate::expr_fn::first_value(expr.expr, vec![]); - - add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) -} - -// nth_value requires a non-expr argument -#[pyfunction] -#[pyo3(signature = (expr, n, distinct=None, filter=None, order_by=None, null_treatment=None))] -pub fn nth_value( - expr: PyExpr, - n: i64, - distinct: Option, - filter: Option, - order_by: Option>, - null_treatment: Option, -) -> PyDataFusionResult { - let agg_fn = datafusion::functions_aggregate::nth_value::nth_value(expr.expr, n, vec![]); - add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) -} - -// string_agg requires a non-expr argument -#[pyfunction] -#[pyo3(signature = (expr, delimiter, distinct=None, filter=None, order_by=None, null_treatment=None))] -pub fn string_agg( - expr: PyExpr, - delimiter: String, - distinct: Option, - filter: Option, - order_by: Option>, - null_treatment: Option, -) -> PyDataFusionResult { - let agg_fn = datafusion::functions_aggregate::string_agg::string_agg(expr.expr, lit(delimiter)); - add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) -} - -pub(crate) fn add_builder_fns_to_window( - window_fn: Expr, - partition_by: Option>, - window_frame: Option, - order_by: Option>, - null_treatment: Option, -) -> PyDataFusionResult { - let null_treatment = null_treatment.map(|n| n.into()); - let mut builder = window_fn.null_treatment(null_treatment); - - if let Some(partition_cols) = partition_by { - builder = builder.partition_by( - partition_cols - .into_iter() - .map(|col| col.clone().into()) - .collect(), - ); - } - - if let Some(order_by_cols) = order_by { - let order_by_cols = to_sort_expressions(order_by_cols); - builder = builder.order_by(order_by_cols); - } - - if let Some(window_frame) = window_frame { - builder = builder.window_frame(window_frame.into()); - } - - Ok(builder.build().map(|e| e.into())?) -} - -#[pyfunction] -#[pyo3(signature = (arg, shift_offset, default_value=None, partition_by=None, order_by=None))] -pub fn lead( - arg: PyExpr, - shift_offset: i64, - default_value: Option, - partition_by: Option>, - order_by: Option>, -) -> PyDataFusionResult { - let default_value = default_value.map(|v| v.into()); - let window_fn = functions_window::expr_fn::lead(arg.expr, Some(shift_offset), default_value); - - add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) -} - -#[pyfunction] -#[pyo3(signature = (arg, shift_offset, default_value=None, partition_by=None, order_by=None))] -pub fn lag( - arg: PyExpr, - shift_offset: i64, - default_value: Option, - partition_by: Option>, - order_by: Option>, -) -> PyDataFusionResult { - let default_value = default_value.map(|v| v.into()); - let window_fn = functions_window::expr_fn::lag(arg.expr, Some(shift_offset), default_value); - - add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) -} - -#[pyfunction] -#[pyo3(signature = (partition_by=None, order_by=None))] -pub fn row_number( - partition_by: Option>, - order_by: Option>, -) -> PyDataFusionResult { - let window_fn = functions_window::expr_fn::row_number(); - - add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) -} - -#[pyfunction] -#[pyo3(signature = (partition_by=None, order_by=None))] -pub fn rank( - partition_by: Option>, - order_by: Option>, -) -> PyDataFusionResult { - let window_fn = functions_window::expr_fn::rank(); - - add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) -} - -#[pyfunction] -#[pyo3(signature = (partition_by=None, order_by=None))] -pub fn dense_rank( - partition_by: Option>, - order_by: Option>, -) -> PyDataFusionResult { - let window_fn = functions_window::expr_fn::dense_rank(); - - add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) -} - -#[pyfunction] -#[pyo3(signature = (partition_by=None, order_by=None))] -pub fn percent_rank( - partition_by: Option>, - order_by: Option>, -) -> PyDataFusionResult { - let window_fn = functions_window::expr_fn::percent_rank(); - - add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) -} - -#[pyfunction] -#[pyo3(signature = (partition_by=None, order_by=None))] -pub fn cume_dist( - partition_by: Option>, - order_by: Option>, -) -> PyDataFusionResult { - let window_fn = functions_window::expr_fn::cume_dist(); - - add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) -} - -#[pyfunction] -#[pyo3(signature = (arg, partition_by=None, order_by=None))] -pub fn ntile( - arg: PyExpr, - partition_by: Option>, - order_by: Option>, -) -> PyDataFusionResult { - let window_fn = functions_window::expr_fn::ntile(arg.into()); - - add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) -} - -pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_wrapped(wrap_pyfunction!(abs))?; - m.add_wrapped(wrap_pyfunction!(acos))?; - m.add_wrapped(wrap_pyfunction!(acosh))?; - m.add_wrapped(wrap_pyfunction!(approx_distinct))?; - m.add_wrapped(wrap_pyfunction!(alias))?; - m.add_wrapped(wrap_pyfunction!(approx_median))?; - m.add_wrapped(wrap_pyfunction!(approx_percentile_cont))?; - m.add_wrapped(wrap_pyfunction!(approx_percentile_cont_with_weight))?; - m.add_wrapped(wrap_pyfunction!(range))?; - m.add_wrapped(wrap_pyfunction!(array_agg))?; - m.add_wrapped(wrap_pyfunction!(arrow_typeof))?; - m.add_wrapped(wrap_pyfunction!(arrow_cast))?; - m.add_wrapped(wrap_pyfunction!(ascii))?; - m.add_wrapped(wrap_pyfunction!(asin))?; - m.add_wrapped(wrap_pyfunction!(asinh))?; - m.add_wrapped(wrap_pyfunction!(atan))?; - m.add_wrapped(wrap_pyfunction!(atanh))?; - m.add_wrapped(wrap_pyfunction!(atan2))?; - m.add_wrapped(wrap_pyfunction!(avg))?; - m.add_wrapped(wrap_pyfunction!(bit_length))?; - m.add_wrapped(wrap_pyfunction!(btrim))?; - m.add_wrapped(wrap_pyfunction!(cbrt))?; - m.add_wrapped(wrap_pyfunction!(ceil))?; - m.add_wrapped(wrap_pyfunction!(character_length))?; - m.add_wrapped(wrap_pyfunction!(chr))?; - m.add_wrapped(wrap_pyfunction!(char_length))?; - m.add_wrapped(wrap_pyfunction!(coalesce))?; - m.add_wrapped(wrap_pyfunction!(case))?; - m.add_wrapped(wrap_pyfunction!(when))?; - m.add_wrapped(wrap_pyfunction!(col))?; - m.add_wrapped(wrap_pyfunction!(concat_ws))?; - m.add_wrapped(wrap_pyfunction!(concat))?; - m.add_wrapped(wrap_pyfunction!(corr))?; - m.add_wrapped(wrap_pyfunction!(cos))?; - m.add_wrapped(wrap_pyfunction!(cosh))?; - m.add_wrapped(wrap_pyfunction!(cot))?; - m.add_wrapped(wrap_pyfunction!(count))?; - m.add_wrapped(wrap_pyfunction!(covar_pop))?; - m.add_wrapped(wrap_pyfunction!(covar_samp))?; - m.add_wrapped(wrap_pyfunction!(current_date))?; - m.add_wrapped(wrap_pyfunction!(current_time))?; - m.add_wrapped(wrap_pyfunction!(degrees))?; - m.add_wrapped(wrap_pyfunction!(date_bin))?; - m.add_wrapped(wrap_pyfunction!(date_part))?; - m.add_wrapped(wrap_pyfunction!(date_trunc))?; - m.add_wrapped(wrap_pyfunction!(make_date))?; - m.add_wrapped(wrap_pyfunction!(digest))?; - m.add_wrapped(wrap_pyfunction!(ends_with))?; - m.add_wrapped(wrap_pyfunction!(exp))?; - m.add_wrapped(wrap_pyfunction!(factorial))?; - m.add_wrapped(wrap_pyfunction!(floor))?; - m.add_wrapped(wrap_pyfunction!(from_unixtime))?; - m.add_wrapped(wrap_pyfunction!(gcd))?; - // m.add_wrapped(wrap_pyfunction!(grouping))?; - m.add_wrapped(wrap_pyfunction!(in_list))?; - m.add_wrapped(wrap_pyfunction!(initcap))?; - m.add_wrapped(wrap_pyfunction!(isnan))?; - m.add_wrapped(wrap_pyfunction!(iszero))?; - m.add_wrapped(wrap_pyfunction!(levenshtein))?; - m.add_wrapped(wrap_pyfunction!(lcm))?; - m.add_wrapped(wrap_pyfunction!(left))?; - m.add_wrapped(wrap_pyfunction!(length))?; - m.add_wrapped(wrap_pyfunction!(ln))?; - m.add_wrapped(wrap_pyfunction!(self::log))?; - m.add_wrapped(wrap_pyfunction!(log10))?; - m.add_wrapped(wrap_pyfunction!(log2))?; - m.add_wrapped(wrap_pyfunction!(lower))?; - m.add_wrapped(wrap_pyfunction!(lpad))?; - m.add_wrapped(wrap_pyfunction!(ltrim))?; - m.add_wrapped(wrap_pyfunction!(max))?; - m.add_wrapped(wrap_pyfunction!(make_array))?; - m.add_wrapped(wrap_pyfunction!(md5))?; - m.add_wrapped(wrap_pyfunction!(median))?; - m.add_wrapped(wrap_pyfunction!(min))?; - m.add_wrapped(wrap_pyfunction!(named_struct))?; - m.add_wrapped(wrap_pyfunction!(nanvl))?; - m.add_wrapped(wrap_pyfunction!(nvl))?; - m.add_wrapped(wrap_pyfunction!(now))?; - m.add_wrapped(wrap_pyfunction!(nullif))?; - m.add_wrapped(wrap_pyfunction!(octet_length))?; - m.add_wrapped(wrap_pyfunction!(order_by))?; - m.add_wrapped(wrap_pyfunction!(overlay))?; - m.add_wrapped(wrap_pyfunction!(pi))?; - m.add_wrapped(wrap_pyfunction!(power))?; - m.add_wrapped(wrap_pyfunction!(radians))?; - m.add_wrapped(wrap_pyfunction!(random))?; - m.add_wrapped(wrap_pyfunction!(regexp_count))?; - m.add_wrapped(wrap_pyfunction!(regexp_instr))?; - m.add_wrapped(wrap_pyfunction!(regexp_like))?; - m.add_wrapped(wrap_pyfunction!(regexp_match))?; - m.add_wrapped(wrap_pyfunction!(regexp_replace))?; - m.add_wrapped(wrap_pyfunction!(repeat))?; - m.add_wrapped(wrap_pyfunction!(replace))?; - m.add_wrapped(wrap_pyfunction!(reverse))?; - m.add_wrapped(wrap_pyfunction!(right))?; - m.add_wrapped(wrap_pyfunction!(round))?; - m.add_wrapped(wrap_pyfunction!(rpad))?; - m.add_wrapped(wrap_pyfunction!(rtrim))?; - m.add_wrapped(wrap_pyfunction!(sha224))?; - m.add_wrapped(wrap_pyfunction!(sha256))?; - m.add_wrapped(wrap_pyfunction!(sha384))?; - m.add_wrapped(wrap_pyfunction!(sha512))?; - m.add_wrapped(wrap_pyfunction!(signum))?; - m.add_wrapped(wrap_pyfunction!(sin))?; - m.add_wrapped(wrap_pyfunction!(sinh))?; - m.add_wrapped(wrap_pyfunction!(split_part))?; - m.add_wrapped(wrap_pyfunction!(sqrt))?; - m.add_wrapped(wrap_pyfunction!(starts_with))?; - m.add_wrapped(wrap_pyfunction!(stddev))?; - m.add_wrapped(wrap_pyfunction!(stddev_pop))?; - m.add_wrapped(wrap_pyfunction!(string_agg))?; - m.add_wrapped(wrap_pyfunction!(strpos))?; - m.add_wrapped(wrap_pyfunction!(r#struct))?; // Use raw identifier since struct is a keyword - m.add_wrapped(wrap_pyfunction!(substr))?; - m.add_wrapped(wrap_pyfunction!(substr_index))?; - m.add_wrapped(wrap_pyfunction!(substring))?; - m.add_wrapped(wrap_pyfunction!(find_in_set))?; - m.add_wrapped(wrap_pyfunction!(sum))?; - m.add_wrapped(wrap_pyfunction!(tan))?; - m.add_wrapped(wrap_pyfunction!(tanh))?; - m.add_wrapped(wrap_pyfunction!(to_hex))?; - m.add_wrapped(wrap_pyfunction!(to_char))?; - m.add_wrapped(wrap_pyfunction!(to_date))?; - m.add_wrapped(wrap_pyfunction!(to_local_time))?; - m.add_wrapped(wrap_pyfunction!(to_time))?; - m.add_wrapped(wrap_pyfunction!(to_timestamp))?; - m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?; - m.add_wrapped(wrap_pyfunction!(to_timestamp_nanos))?; - m.add_wrapped(wrap_pyfunction!(to_timestamp_micros))?; - m.add_wrapped(wrap_pyfunction!(to_timestamp_seconds))?; - m.add_wrapped(wrap_pyfunction!(to_unixtime))?; - m.add_wrapped(wrap_pyfunction!(translate))?; - m.add_wrapped(wrap_pyfunction!(trim))?; - m.add_wrapped(wrap_pyfunction!(trunc))?; - m.add_wrapped(wrap_pyfunction!(upper))?; - m.add_wrapped(wrap_pyfunction!(self::uuid))?; // Use self to avoid name collision - m.add_wrapped(wrap_pyfunction!(var_pop))?; - m.add_wrapped(wrap_pyfunction!(var_sample))?; - m.add_wrapped(wrap_pyfunction!(window))?; - m.add_wrapped(wrap_pyfunction!(regr_avgx))?; - m.add_wrapped(wrap_pyfunction!(regr_avgy))?; - m.add_wrapped(wrap_pyfunction!(regr_count))?; - m.add_wrapped(wrap_pyfunction!(regr_intercept))?; - m.add_wrapped(wrap_pyfunction!(regr_r2))?; - m.add_wrapped(wrap_pyfunction!(regr_slope))?; - m.add_wrapped(wrap_pyfunction!(regr_sxx))?; - m.add_wrapped(wrap_pyfunction!(regr_sxy))?; - m.add_wrapped(wrap_pyfunction!(regr_syy))?; - m.add_wrapped(wrap_pyfunction!(first_value))?; - m.add_wrapped(wrap_pyfunction!(last_value))?; - m.add_wrapped(wrap_pyfunction!(nth_value))?; - m.add_wrapped(wrap_pyfunction!(bit_and))?; - m.add_wrapped(wrap_pyfunction!(bit_or))?; - m.add_wrapped(wrap_pyfunction!(bit_xor))?; - m.add_wrapped(wrap_pyfunction!(bool_and))?; - m.add_wrapped(wrap_pyfunction!(bool_or))?; - - //Binary String Functions - m.add_wrapped(wrap_pyfunction!(encode))?; - m.add_wrapped(wrap_pyfunction!(decode))?; - - // Array Functions - m.add_wrapped(wrap_pyfunction!(array_append))?; - m.add_wrapped(wrap_pyfunction!(array_concat))?; - m.add_wrapped(wrap_pyfunction!(array_cat))?; - m.add_wrapped(wrap_pyfunction!(array_dims))?; - m.add_wrapped(wrap_pyfunction!(array_distinct))?; - m.add_wrapped(wrap_pyfunction!(array_element))?; - m.add_wrapped(wrap_pyfunction!(array_empty))?; - m.add_wrapped(wrap_pyfunction!(array_length))?; - m.add_wrapped(wrap_pyfunction!(array_has))?; - m.add_wrapped(wrap_pyfunction!(array_has_all))?; - m.add_wrapped(wrap_pyfunction!(array_has_any))?; - m.add_wrapped(wrap_pyfunction!(array_position))?; - m.add_wrapped(wrap_pyfunction!(array_positions))?; - m.add_wrapped(wrap_pyfunction!(array_to_string))?; - m.add_wrapped(wrap_pyfunction!(array_intersect))?; - m.add_wrapped(wrap_pyfunction!(array_union))?; - m.add_wrapped(wrap_pyfunction!(array_except))?; - m.add_wrapped(wrap_pyfunction!(array_resize))?; - m.add_wrapped(wrap_pyfunction!(array_ndims))?; - m.add_wrapped(wrap_pyfunction!(array_prepend))?; - m.add_wrapped(wrap_pyfunction!(array_pop_back))?; - m.add_wrapped(wrap_pyfunction!(array_pop_front))?; - m.add_wrapped(wrap_pyfunction!(array_remove))?; - m.add_wrapped(wrap_pyfunction!(array_remove_n))?; - m.add_wrapped(wrap_pyfunction!(array_remove_all))?; - m.add_wrapped(wrap_pyfunction!(array_repeat))?; - m.add_wrapped(wrap_pyfunction!(array_replace))?; - m.add_wrapped(wrap_pyfunction!(array_replace_n))?; - m.add_wrapped(wrap_pyfunction!(array_replace_all))?; - m.add_wrapped(wrap_pyfunction!(array_sort))?; - m.add_wrapped(wrap_pyfunction!(array_slice))?; - m.add_wrapped(wrap_pyfunction!(flatten))?; - m.add_wrapped(wrap_pyfunction!(cardinality))?; - - // Window Functions - m.add_wrapped(wrap_pyfunction!(lead))?; - m.add_wrapped(wrap_pyfunction!(lag))?; - m.add_wrapped(wrap_pyfunction!(rank))?; - m.add_wrapped(wrap_pyfunction!(row_number))?; - m.add_wrapped(wrap_pyfunction!(dense_rank))?; - m.add_wrapped(wrap_pyfunction!(percent_rank))?; - m.add_wrapped(wrap_pyfunction!(cume_dist))?; - m.add_wrapped(wrap_pyfunction!(ntile))?; - - Ok(()) -} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 468243a3d..000000000 --- a/src/lib.rs +++ /dev/null @@ -1,147 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Re-export Apache Arrow DataFusion dependencies -pub use datafusion::{ - self, common as datafusion_common, logical_expr as datafusion_expr, optimizer, - sql as datafusion_sql, -}; -#[cfg(feature = "substrait")] -pub use datafusion_substrait; -#[cfg(feature = "mimalloc")] -use mimalloc::MiMalloc; -use pyo3::prelude::*; - -#[allow(clippy::borrow_deref_ref)] -pub mod catalog; -pub mod common; - -#[allow(clippy::borrow_deref_ref)] -mod config; -#[allow(clippy::borrow_deref_ref)] -pub mod context; -#[allow(clippy::borrow_deref_ref)] -pub mod dataframe; -mod dataset; -mod dataset_exec; -pub mod errors; -#[allow(clippy::borrow_deref_ref)] -pub mod expr; -#[allow(clippy::borrow_deref_ref)] -mod functions; -mod options; -pub mod physical_plan; -mod pyarrow_filter_expression; -pub mod pyarrow_util; -mod record_batch; -pub mod sql; -pub mod store; -pub mod table; -pub mod unparser; - -mod array; -#[cfg(feature = "substrait")] -pub mod substrait; -#[allow(clippy::borrow_deref_ref)] -mod udaf; -#[allow(clippy::borrow_deref_ref)] -mod udf; -pub mod udtf; -mod udwf; -pub mod utils; - -#[cfg(feature = "mimalloc")] -#[global_allocator] -static GLOBAL: MiMalloc = MiMalloc; - -// Used to define Tokio Runtime as a Python module attribute -pub(crate) struct TokioRuntime(tokio::runtime::Runtime); - -/// Low-level DataFusion internal package. -/// -/// The higher-level public API is defined in pure python files under the -/// datafusion directory. -#[pymodule] -fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { - // Initialize logging - pyo3_log::init(); - - // Register the python classes - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - - let catalog = PyModule::new(py, "catalog")?; - catalog::init_module(&catalog)?; - m.add_submodule(&catalog)?; - - // Register `common` as a submodule. Matching `datafusion-common` https://docs.rs/datafusion-common/latest/datafusion_common/ - let common = PyModule::new(py, "common")?; - common::init_module(&common)?; - m.add_submodule(&common)?; - - // Register `expr` as a submodule. Matching `datafusion-expr` https://docs.rs/datafusion-expr/latest/datafusion_expr/ - let expr = PyModule::new(py, "expr")?; - expr::init_module(&expr)?; - m.add_submodule(&expr)?; - - let unparser = PyModule::new(py, "unparser")?; - unparser::init_module(&unparser)?; - m.add_submodule(&unparser)?; - - // Register the functions as a submodule - let funcs = PyModule::new(py, "functions")?; - functions::init_module(&funcs)?; - m.add_submodule(&funcs)?; - - let store = PyModule::new(py, "object_store")?; - store::init_module(&store)?; - m.add_submodule(&store)?; - - let options = PyModule::new(py, "options")?; - options::init_module(&options)?; - m.add_submodule(&options)?; - - // Register substrait as a submodule - #[cfg(feature = "substrait")] - setup_substrait_module(py, &m)?; - - Ok(()) -} - -#[cfg(feature = "substrait")] -fn setup_substrait_module(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { - let substrait = PyModule::new(py, "substrait")?; - substrait::init_module(&substrait)?; - m.add_submodule(&substrait)?; - Ok(()) -} diff --git a/src/options.rs b/src/options.rs deleted file mode 100644 index 6b6037695..000000000 --- a/src/options.rs +++ /dev/null @@ -1,159 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use arrow::datatypes::{DataType, Schema}; -use arrow::pyarrow::PyArrowType; -use datafusion::prelude::CsvReadOptions; -use pyo3::prelude::{PyModule, PyModuleMethods}; -use pyo3::{Bound, PyResult, pyclass, pymethods}; - -use crate::context::parse_file_compression_type; -use crate::errors::PyDataFusionError; -use crate::expr::sort_expr::PySortExpr; - -/// Options for reading CSV files -#[pyclass(name = "CsvReadOptions", module = "datafusion.options", frozen)] -pub struct PyCsvReadOptions { - pub has_header: bool, - pub delimiter: u8, - pub quote: u8, - pub terminator: Option, - pub escape: Option, - pub comment: Option, - pub newlines_in_values: bool, - pub schema: Option>, - pub schema_infer_max_records: usize, - pub file_extension: String, - pub table_partition_cols: Vec<(String, PyArrowType)>, - pub file_compression_type: String, - pub file_sort_order: Vec>, - pub null_regex: Option, - pub truncated_rows: bool, -} - -#[pymethods] -impl PyCsvReadOptions { - #[allow(clippy::too_many_arguments)] - #[pyo3(signature = ( - has_header=true, - delimiter=b',', - quote=b'"', - terminator=None, - escape=None, - comment=None, - newlines_in_values=false, - schema=None, - schema_infer_max_records=1000, - file_extension=".csv".to_string(), - table_partition_cols=vec![], - file_compression_type="".to_string(), - file_sort_order=vec![], - null_regex=None, - truncated_rows=false - ))] - #[new] - fn new( - has_header: bool, - delimiter: u8, - quote: u8, - terminator: Option, - escape: Option, - comment: Option, - newlines_in_values: bool, - schema: Option>, - schema_infer_max_records: usize, - file_extension: String, - table_partition_cols: Vec<(String, PyArrowType)>, - file_compression_type: String, - file_sort_order: Vec>, - null_regex: Option, - truncated_rows: bool, - ) -> Self { - Self { - has_header, - delimiter, - quote, - terminator, - escape, - comment, - newlines_in_values, - schema, - schema_infer_max_records, - file_extension, - table_partition_cols, - file_compression_type, - file_sort_order, - null_regex, - truncated_rows, - } - } -} - -impl<'a> TryFrom<&'a PyCsvReadOptions> for CsvReadOptions<'a> { - type Error = PyDataFusionError; - - fn try_from(value: &'a PyCsvReadOptions) -> Result, Self::Error> { - let partition_cols: Vec<(String, DataType)> = value - .table_partition_cols - .iter() - .map(|(name, dtype)| (name.clone(), dtype.0.clone())) - .collect(); - - let compression = parse_file_compression_type(Some(value.file_compression_type.clone()))?; - - let sort_order: Vec> = value - .file_sort_order - .iter() - .map(|inner| { - inner - .iter() - .map(|sort_expr| sort_expr.sort.clone()) - .collect() - }) - .collect(); - - // Explicit struct initialization to catch upstream changes - let mut options = CsvReadOptions { - has_header: value.has_header, - delimiter: value.delimiter, - quote: value.quote, - terminator: value.terminator, - escape: value.escape, - comment: value.comment, - newlines_in_values: value.newlines_in_values, - schema: None, // Will be set separately due to lifetime constraints - schema_infer_max_records: value.schema_infer_max_records, - file_extension: value.file_extension.as_str(), - table_partition_cols: partition_cols, - file_compression_type: compression, - file_sort_order: sort_order, - null_regex: value.null_regex.clone(), - truncated_rows: value.truncated_rows, - }; - - // Set schema separately to handle the lifetime - options.schema = value.schema.as_ref().map(|s| &s.0); - - Ok(options) - } -} - -pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - - Ok(()) -} diff --git a/src/physical_plan.rs b/src/physical_plan.rs deleted file mode 100644 index 8674a8b55..000000000 --- a/src/physical_plan.rs +++ /dev/null @@ -1,119 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use datafusion::physical_plan::{ExecutionPlan, ExecutionPlanProperties, displayable}; -use datafusion_proto::physical_plan::{AsExecutionPlan, DefaultPhysicalExtensionCodec}; -use prost::Message; -use pyo3::exceptions::PyRuntimeError; -use pyo3::prelude::*; -use pyo3::types::PyBytes; - -use crate::context::PySessionContext; -use crate::errors::PyDataFusionResult; - -#[pyclass( - from_py_object, - frozen, - name = "ExecutionPlan", - module = "datafusion", - subclass -)] -#[derive(Debug, Clone)] -pub struct PyExecutionPlan { - pub plan: Arc, -} - -impl PyExecutionPlan { - /// creates a new PyPhysicalPlan - pub fn new(plan: Arc) -> Self { - Self { plan } - } -} - -#[pymethods] -impl PyExecutionPlan { - /// Get the inputs to this plan - pub fn children(&self) -> Vec { - self.plan - .children() - .iter() - .map(|&p| p.to_owned().into()) - .collect() - } - - pub fn display(&self) -> String { - let d = displayable(self.plan.as_ref()); - format!("{}", d.one_line()) - } - - pub fn display_indent(&self) -> String { - let d = displayable(self.plan.as_ref()); - format!("{}", d.indent(false)) - } - - pub fn to_proto<'py>(&'py self, py: Python<'py>) -> PyDataFusionResult> { - let codec = DefaultPhysicalExtensionCodec {}; - let proto = datafusion_proto::protobuf::PhysicalPlanNode::try_from_physical_plan( - self.plan.clone(), - &codec, - )?; - - let bytes = proto.encode_to_vec(); - Ok(PyBytes::new(py, &bytes)) - } - - #[staticmethod] - pub fn from_proto( - ctx: PySessionContext, - proto_msg: Bound<'_, PyBytes>, - ) -> PyDataFusionResult { - let bytes: &[u8] = proto_msg.extract().map_err(Into::::into)?; - let proto_plan = - datafusion_proto::protobuf::PhysicalPlanNode::decode(bytes).map_err(|e| { - PyRuntimeError::new_err(format!( - "Unable to decode logical node from serialized bytes: {e}" - )) - })?; - - let codec = DefaultPhysicalExtensionCodec {}; - let plan = proto_plan.try_into_physical_plan(ctx.ctx.task_ctx().as_ref(), &codec)?; - Ok(Self::new(plan)) - } - - fn __repr__(&self) -> String { - self.display_indent() - } - - #[getter] - pub fn partition_count(&self) -> usize { - self.plan.output_partitioning().partition_count() - } -} - -impl From for Arc { - fn from(plan: PyExecutionPlan) -> Arc { - plan.plan.clone() - } -} - -impl From> for PyExecutionPlan { - fn from(plan: Arc) -> PyExecutionPlan { - PyExecutionPlan { plan: plan.clone() } - } -} diff --git a/src/pyarrow_filter_expression.rs b/src/pyarrow_filter_expression.rs deleted file mode 100644 index e3b4b6009..000000000 --- a/src/pyarrow_filter_expression.rs +++ /dev/null @@ -1,180 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::convert::TryFrom; -use std::result::Result; - -use datafusion::common::{Column, ScalarValue}; -use datafusion::logical_expr::expr::InList; -use datafusion::logical_expr::{Between, BinaryExpr, Expr, Operator}; -/// Converts a Datafusion logical plan expression (Expr) into a PyArrow compute expression -use pyo3::{IntoPyObjectExt, prelude::*}; - -use crate::errors::{PyDataFusionError, PyDataFusionResult}; -use crate::pyarrow_util::scalar_to_pyarrow; - -#[derive(Debug)] -#[repr(transparent)] -pub(crate) struct PyArrowFilterExpression(Py); - -fn operator_to_py<'py>( - operator: &Operator, - op: &Bound<'py, PyModule>, -) -> PyDataFusionResult> { - let py_op: Bound<'_, PyAny> = match operator { - Operator::Eq => op.getattr("eq")?, - Operator::NotEq => op.getattr("ne")?, - Operator::Lt => op.getattr("lt")?, - Operator::LtEq => op.getattr("le")?, - Operator::Gt => op.getattr("gt")?, - Operator::GtEq => op.getattr("ge")?, - Operator::And => op.getattr("and_")?, - Operator::Or => op.getattr("or_")?, - _ => { - return Err(PyDataFusionError::Common(format!( - "Unsupported operator {operator:?}" - ))); - } - }; - Ok(py_op) -} - -fn extract_scalar_list<'py>( - exprs: &[Expr], - py: Python<'py>, -) -> PyDataFusionResult>> { - exprs - .iter() - .map(|expr| match expr { - // TODO: should we also leverage `ScalarValue::to_pyarrow` here? - Expr::Literal(v, _) => match v { - // The unwraps here are for infallible conversions - ScalarValue::Boolean(Some(b)) => Ok(b.into_bound_py_any(py)?), - ScalarValue::Int8(Some(i)) => Ok(i.into_bound_py_any(py)?), - ScalarValue::Int16(Some(i)) => Ok(i.into_bound_py_any(py)?), - ScalarValue::Int32(Some(i)) => Ok(i.into_bound_py_any(py)?), - ScalarValue::Int64(Some(i)) => Ok(i.into_bound_py_any(py)?), - ScalarValue::UInt8(Some(i)) => Ok(i.into_bound_py_any(py)?), - ScalarValue::UInt16(Some(i)) => Ok(i.into_bound_py_any(py)?), - ScalarValue::UInt32(Some(i)) => Ok(i.into_bound_py_any(py)?), - ScalarValue::UInt64(Some(i)) => Ok(i.into_bound_py_any(py)?), - ScalarValue::Float32(Some(f)) => Ok(f.into_bound_py_any(py)?), - ScalarValue::Float64(Some(f)) => Ok(f.into_bound_py_any(py)?), - ScalarValue::Utf8(Some(s)) => Ok(s.into_bound_py_any(py)?), - _ => Err(PyDataFusionError::Common(format!( - "PyArrow can't handle ScalarValue: {v:?}" - ))), - }, - _ => Err(PyDataFusionError::Common(format!( - "Only a list of Literals are supported got {expr:?}" - ))), - }) - .collect() -} - -impl PyArrowFilterExpression { - pub fn inner(&self) -> &Py { - &self.0 - } -} - -impl TryFrom<&Expr> for PyArrowFilterExpression { - type Error = PyDataFusionError; - - // Converts a Datafusion filter Expr into an expression string that can be evaluated by Python - // Note that pyarrow.compute.{field,scalar} are put into Python globals() when evaluated - // isin, is_null, and is_valid (~is_null) are methods of pyarrow.dataset.Expression - // https://arrow.apache.org/docs/python/generated/pyarrow.dataset.Expression.html#pyarrow-dataset-expression - fn try_from(expr: &Expr) -> Result { - Python::attach(|py| { - let pc = Python::import(py, "pyarrow.compute")?; - let op_module = Python::import(py, "operator")?; - let pc_expr: PyDataFusionResult> = match expr { - Expr::Column(Column { name, .. }) => Ok(pc.getattr("field")?.call1((name,))?), - Expr::Literal(scalar, _) => Ok(scalar_to_pyarrow(scalar, py)?), - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { - let operator = operator_to_py(op, &op_module)?; - let left = PyArrowFilterExpression::try_from(left.as_ref())?.0; - let right = PyArrowFilterExpression::try_from(right.as_ref())?.0; - Ok(operator.call1((left, right))?) - } - Expr::Not(expr) => { - let operator = op_module.getattr("invert")?; - let py_expr = PyArrowFilterExpression::try_from(expr.as_ref())?.0; - Ok(operator.call1((py_expr,))?) - } - Expr::IsNotNull(expr) => { - let py_expr = PyArrowFilterExpression::try_from(expr.as_ref())? - .0 - .into_bound(py); - Ok(py_expr.call_method0("is_valid")?) - } - Expr::IsNull(expr) => { - let expr = PyArrowFilterExpression::try_from(expr.as_ref())? - .0 - .into_bound(py); - - // https://arrow.apache.org/docs/python/generated/pyarrow.dataset.Expression.html#pyarrow.dataset.Expression.is_null - // Whether floating-point NaNs are considered null. - let nan_is_null = false; - - let res = expr.call_method1("is_null", (nan_is_null,))?; - Ok(res) - } - Expr::Between(Between { - expr, - negated, - low, - high, - }) => { - let expr = PyArrowFilterExpression::try_from(expr.as_ref())?.0; - let low = PyArrowFilterExpression::try_from(low.as_ref())?.0; - let high = PyArrowFilterExpression::try_from(high.as_ref())?.0; - let and = op_module.getattr("and_")?; - let le = op_module.getattr("le")?; - let invert = op_module.getattr("invert")?; - - // scalar <= field() returns a boolean expression so we need to use and to combine these - let ret = and.call1(( - le.call1((low, expr.clone_ref(py)))?, - le.call1((expr, high))?, - ))?; - - Ok(if *negated { invert.call1((ret,))? } else { ret }) - } - Expr::InList(InList { - expr, - list, - negated, - }) => { - let expr = PyArrowFilterExpression::try_from(expr.as_ref())? - .0 - .into_bound(py); - let scalars = extract_scalar_list(list, py)?; - let ret = expr.call_method1("isin", (scalars,))?; - let invert = op_module.getattr("invert")?; - - Ok(if *negated { invert.call1((ret,))? } else { ret }) - } - _ => Err(PyDataFusionError::Common(format!( - "Unsupported Datafusion expression {expr:?}" - ))), - }; - Ok(PyArrowFilterExpression(pc_expr?.into())) - }) - } -} diff --git a/src/pyarrow_util.rs b/src/pyarrow_util.rs deleted file mode 100644 index 1401a4938..000000000 --- a/src/pyarrow_util.rs +++ /dev/null @@ -1,163 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Conversions between PyArrow and DataFusion types - -use std::sync::Arc; - -use arrow::array::{Array, ArrayData, ArrayRef, ListArray, make_array}; -use arrow::buffer::OffsetBuffer; -use arrow::datatypes::Field; -use arrow::pyarrow::{FromPyArrow, ToPyArrow}; -use datafusion::common::exec_err; -use datafusion::scalar::ScalarValue; -use pyo3::types::{PyAnyMethods, PyList}; -use pyo3::{Borrowed, Bound, FromPyObject, PyAny, PyErr, PyResult, Python}; - -use crate::common::data_type::PyScalarValue; -use crate::errors::PyDataFusionError; - -/// Helper function to turn an Array into a ScalarValue. If ``as_list_array`` is true, -/// the array will be turned into a ``ListArray``. Otherwise, we extract the first value -/// from the array. -fn array_to_scalar_value(array: ArrayRef, as_list_array: bool) -> PyResult { - if as_list_array { - let field = Arc::new(Field::new_list_field( - array.data_type().clone(), - array.nulls().is_some(), - )); - let offsets = OffsetBuffer::from_lengths(vec![array.len()]); - let list_array = ListArray::new(field, offsets, array, None); - Ok(PyScalarValue(ScalarValue::List(Arc::new(list_array)))) - } else { - let scalar = ScalarValue::try_from_array(&array, 0).map_err(PyDataFusionError::from)?; - Ok(PyScalarValue(scalar)) - } -} - -/// Helper function to take any Python object that contains an Arrow PyCapsule -/// interface and attempt to extract a scalar value from it. If `as_list_array` -/// is true, the array will be turned into a ``ListArray``. Otherwise, we extract -/// the first value from the array. -fn pyobj_extract_scalar_via_capsule( - value: &Bound<'_, PyAny>, - as_list_array: bool, -) -> PyResult { - let array_data = ArrayData::from_pyarrow_bound(value)?; - let array = make_array(array_data); - - array_to_scalar_value(array, as_list_array) -} - -impl FromPyArrow for PyScalarValue { - fn from_pyarrow_bound(value: &Bound<'_, PyAny>) -> PyResult { - let py = value.py(); - let pyarrow_mod = py.import("pyarrow"); - - // Is it a PyArrow object? - if let Ok(pa) = pyarrow_mod.as_ref() { - let scalar_type = pa.getattr("Scalar")?; - if value.is_instance(&scalar_type)? { - let typ = value.getattr("type")?; - - // construct pyarrow array from the python value and pyarrow type - let factory = py.import("pyarrow")?.getattr("array")?; - let args = PyList::new(py, [value])?; - let array = factory.call1((args, typ))?; - - return pyobj_extract_scalar_via_capsule(&array, false); - } - - let array_type = pa.getattr("Array")?; - if value.is_instance(&array_type)? { - return pyobj_extract_scalar_via_capsule(value, true); - } - } - - // Is it a NanoArrow scalar? - if let Ok(na) = py.import("nanoarrow") { - let scalar_type = py.import("nanoarrow.array")?.getattr("Scalar")?; - if value.is_instance(&scalar_type)? { - return pyobj_extract_scalar_via_capsule(value, false); - } - let array_type = na.getattr("Array")?; - if value.is_instance(&array_type)? { - return pyobj_extract_scalar_via_capsule(value, true); - } - } - - // Is it a arro3 scalar? - if let Ok(arro3) = py.import("arro3").and_then(|arro3| arro3.getattr("core")) { - let scalar_type = arro3.getattr("Scalar")?; - if value.is_instance(&scalar_type)? { - return pyobj_extract_scalar_via_capsule(value, false); - } - let array_type = arro3.getattr("Array")?; - if value.is_instance(&array_type)? { - return pyobj_extract_scalar_via_capsule(value, true); - } - } - - // Does it have a PyCapsule interface but isn't one of our known libraries? - // If so do our "best guess". Try checking type name, and if that fails - // return a single value if the length is 1 and return a List value otherwise - if value.hasattr("__arrow_c_array__")? { - let type_name = value.get_type().repr()?; - if type_name.contains("Scalar")? { - return pyobj_extract_scalar_via_capsule(value, false); - } - if type_name.contains("Array")? { - return pyobj_extract_scalar_via_capsule(value, true); - } - - let array_data = ArrayData::from_pyarrow_bound(value)?; - let array = make_array(array_data); - - let as_array_list = array.len() != 1; - return array_to_scalar_value(array, as_array_list); - } - - // Last attempt - try to create a PyArrow scalar from a plain Python object - if let Ok(pa) = pyarrow_mod.as_ref() { - let scalar = pa.call_method1("scalar", (value,))?; - - PyScalarValue::from_pyarrow_bound(&scalar) - } else { - exec_err!("Unable to import scalar value").map_err(PyDataFusionError::from)? - } - } -} - -impl<'source> FromPyObject<'_, 'source> for PyScalarValue { - type Error = PyErr; - - fn extract(value: Borrowed<'_, 'source, PyAny>) -> Result { - Self::from_pyarrow_bound(&value) - } -} - -pub fn scalar_to_pyarrow<'py>( - scalar: &ScalarValue, - py: Python<'py>, -) -> PyResult> { - let array = scalar.to_array().map_err(PyDataFusionError::from)?; - // convert to pyarrow array using C data interface - let pyarray = array.to_data().to_pyarrow(py)?; - let pyscalar = pyarray.call_method1("__getitem__", (0,))?; - - Ok(pyscalar) -} diff --git a/src/record_batch.rs b/src/record_batch.rs deleted file mode 100644 index e8abc641b..000000000 --- a/src/record_batch.rs +++ /dev/null @@ -1,113 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use datafusion::arrow::pyarrow::ToPyArrow; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::physical_plan::SendableRecordBatchStream; -use futures::StreamExt; -use pyo3::exceptions::{PyStopAsyncIteration, PyStopIteration}; -use pyo3::prelude::*; -use pyo3::{PyAny, PyResult, Python, pyclass, pymethods}; -use tokio::sync::Mutex; - -use crate::errors::PyDataFusionError; -use crate::utils::wait_for_future; - -#[pyclass(name = "RecordBatch", module = "datafusion", subclass, frozen)] -pub struct PyRecordBatch { - batch: RecordBatch, -} - -#[pymethods] -impl PyRecordBatch { - fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult> { - self.batch.to_pyarrow(py) - } -} - -impl From for PyRecordBatch { - fn from(batch: RecordBatch) -> Self { - Self { batch } - } -} - -#[pyclass(name = "RecordBatchStream", module = "datafusion", subclass, frozen)] -pub struct PyRecordBatchStream { - stream: Arc>, -} - -impl PyRecordBatchStream { - pub fn new(stream: SendableRecordBatchStream) -> Self { - Self { - stream: Arc::new(Mutex::new(stream)), - } - } -} - -#[pymethods] -impl PyRecordBatchStream { - fn next(&self, py: Python) -> PyResult { - let stream = self.stream.clone(); - wait_for_future(py, next_stream(stream, true))? - } - - fn __next__(&self, py: Python) -> PyResult { - self.next(py) - } - - fn __anext__<'py>(&'py self, py: Python<'py>) -> PyResult> { - let stream = self.stream.clone(); - pyo3_async_runtimes::tokio::future_into_py(py, next_stream(stream, false)) - } - - fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } -} - -/// Polls the next batch from a `SendableRecordBatchStream`, converting the `Option>` form. -pub(crate) async fn poll_next_batch( - stream: &mut SendableRecordBatchStream, -) -> datafusion::error::Result> { - stream.next().await.transpose() -} - -async fn next_stream( - stream: Arc>, - sync: bool, -) -> PyResult { - let mut stream = stream.lock().await; - match poll_next_batch(&mut stream).await { - Ok(Some(batch)) => Ok(batch.into()), - Ok(None) => { - // Depending on whether the iteration is sync or not, we raise either a - // StopIteration or a StopAsyncIteration - if sync { - Err(PyStopIteration::new_err("stream exhausted")) - } else { - Err(PyStopAsyncIteration::new_err("stream exhausted")) - } - } - Err(e) => Err(PyDataFusionError::from(e))?, - } -} diff --git a/src/sql.rs b/src/sql.rs deleted file mode 100644 index dea9b566a..000000000 --- a/src/sql.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -pub mod exceptions; -pub mod logical; -pub(crate) mod util; diff --git a/src/sql/exceptions.rs b/src/sql/exceptions.rs deleted file mode 100644 index cfb02274b..000000000 --- a/src/sql/exceptions.rs +++ /dev/null @@ -1,32 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::fmt::{Debug, Display}; - -use pyo3::PyErr; - -pub fn py_type_err(e: impl Debug + Display) -> PyErr { - PyErr::new::(format!("{e}")) -} - -pub fn py_runtime_err(e: impl Debug + Display) -> PyErr { - PyErr::new::(format!("{e}")) -} - -pub fn py_value_err(e: impl Debug + Display) -> PyErr { - PyErr::new::(format!("{e}")) -} diff --git a/src/sql/logical.rs b/src/sql/logical.rs deleted file mode 100644 index 631aa9b09..000000000 --- a/src/sql/logical.rs +++ /dev/null @@ -1,239 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use datafusion::logical_expr::{DdlStatement, LogicalPlan, Statement}; -use datafusion_proto::logical_plan::{AsLogicalPlan, DefaultLogicalExtensionCodec}; -use prost::Message; -use pyo3::exceptions::PyRuntimeError; -use pyo3::prelude::*; -use pyo3::types::PyBytes; - -use crate::context::PySessionContext; -use crate::errors::PyDataFusionResult; -use crate::expr::aggregate::PyAggregate; -use crate::expr::analyze::PyAnalyze; -use crate::expr::copy_to::PyCopyTo; -use crate::expr::create_catalog::PyCreateCatalog; -use crate::expr::create_catalog_schema::PyCreateCatalogSchema; -use crate::expr::create_external_table::PyCreateExternalTable; -use crate::expr::create_function::PyCreateFunction; -use crate::expr::create_index::PyCreateIndex; -use crate::expr::create_memory_table::PyCreateMemoryTable; -use crate::expr::create_view::PyCreateView; -use crate::expr::describe_table::PyDescribeTable; -use crate::expr::distinct::PyDistinct; -use crate::expr::dml::PyDmlStatement; -use crate::expr::drop_catalog_schema::PyDropCatalogSchema; -use crate::expr::drop_function::PyDropFunction; -use crate::expr::drop_table::PyDropTable; -use crate::expr::drop_view::PyDropView; -use crate::expr::empty_relation::PyEmptyRelation; -use crate::expr::explain::PyExplain; -use crate::expr::extension::PyExtension; -use crate::expr::filter::PyFilter; -use crate::expr::join::PyJoin; -use crate::expr::limit::PyLimit; -use crate::expr::logical_node::LogicalNode; -use crate::expr::projection::PyProjection; -use crate::expr::recursive_query::PyRecursiveQuery; -use crate::expr::repartition::PyRepartition; -use crate::expr::sort::PySort; -use crate::expr::statement::{ - PyDeallocate, PyExecute, PyPrepare, PyResetVariable, PySetVariable, PyTransactionEnd, - PyTransactionStart, -}; -use crate::expr::subquery::PySubquery; -use crate::expr::subquery_alias::PySubqueryAlias; -use crate::expr::table_scan::PyTableScan; -use crate::expr::union::PyUnion; -use crate::expr::unnest::PyUnnest; -use crate::expr::values::PyValues; -use crate::expr::window::PyWindowExpr; - -#[pyclass( - from_py_object, - frozen, - name = "LogicalPlan", - module = "datafusion", - subclass, - eq -)] -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct PyLogicalPlan { - pub(crate) plan: Arc, -} - -impl PyLogicalPlan { - /// creates a new PyLogicalPlan - pub fn new(plan: LogicalPlan) -> Self { - Self { - plan: Arc::new(plan), - } - } - - pub fn plan(&self) -> Arc { - self.plan.clone() - } -} - -#[pymethods] -impl PyLogicalPlan { - /// Return the specific logical operator - pub fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { - match self.plan.as_ref() { - LogicalPlan::Aggregate(plan) => PyAggregate::from(plan.clone()).to_variant(py), - LogicalPlan::Analyze(plan) => PyAnalyze::from(plan.clone()).to_variant(py), - LogicalPlan::Distinct(plan) => PyDistinct::from(plan.clone()).to_variant(py), - LogicalPlan::EmptyRelation(plan) => PyEmptyRelation::from(plan.clone()).to_variant(py), - LogicalPlan::Explain(plan) => PyExplain::from(plan.clone()).to_variant(py), - LogicalPlan::Extension(plan) => PyExtension::from(plan.clone()).to_variant(py), - LogicalPlan::Filter(plan) => PyFilter::from(plan.clone()).to_variant(py), - LogicalPlan::Join(plan) => PyJoin::from(plan.clone()).to_variant(py), - LogicalPlan::Limit(plan) => PyLimit::from(plan.clone()).to_variant(py), - LogicalPlan::Projection(plan) => PyProjection::from(plan.clone()).to_variant(py), - LogicalPlan::Sort(plan) => PySort::from(plan.clone()).to_variant(py), - LogicalPlan::TableScan(plan) => PyTableScan::from(plan.clone()).to_variant(py), - LogicalPlan::Subquery(plan) => PySubquery::from(plan.clone()).to_variant(py), - LogicalPlan::SubqueryAlias(plan) => PySubqueryAlias::from(plan.clone()).to_variant(py), - LogicalPlan::Unnest(plan) => PyUnnest::from(plan.clone()).to_variant(py), - LogicalPlan::Window(plan) => PyWindowExpr::from(plan.clone()).to_variant(py), - LogicalPlan::Repartition(plan) => PyRepartition::from(plan.clone()).to_variant(py), - LogicalPlan::Union(plan) => PyUnion::from(plan.clone()).to_variant(py), - LogicalPlan::Statement(plan) => match plan { - Statement::TransactionStart(plan) => { - PyTransactionStart::from(plan.clone()).to_variant(py) - } - Statement::TransactionEnd(plan) => { - PyTransactionEnd::from(plan.clone()).to_variant(py) - } - Statement::SetVariable(plan) => PySetVariable::from(plan.clone()).to_variant(py), - Statement::ResetVariable(plan) => { - PyResetVariable::from(plan.clone()).to_variant(py) - } - Statement::Prepare(plan) => PyPrepare::from(plan.clone()).to_variant(py), - Statement::Execute(plan) => PyExecute::from(plan.clone()).to_variant(py), - Statement::Deallocate(plan) => PyDeallocate::from(plan.clone()).to_variant(py), - }, - LogicalPlan::Values(plan) => PyValues::from(plan.clone()).to_variant(py), - LogicalPlan::Dml(plan) => PyDmlStatement::from(plan.clone()).to_variant(py), - LogicalPlan::Ddl(plan) => match plan { - DdlStatement::CreateExternalTable(plan) => { - PyCreateExternalTable::from(plan.clone()).to_variant(py) - } - DdlStatement::CreateMemoryTable(plan) => { - PyCreateMemoryTable::from(plan.clone()).to_variant(py) - } - DdlStatement::CreateView(plan) => PyCreateView::from(plan.clone()).to_variant(py), - DdlStatement::CreateCatalogSchema(plan) => { - PyCreateCatalogSchema::from(plan.clone()).to_variant(py) - } - DdlStatement::CreateCatalog(plan) => { - PyCreateCatalog::from(plan.clone()).to_variant(py) - } - DdlStatement::CreateIndex(plan) => PyCreateIndex::from(plan.clone()).to_variant(py), - DdlStatement::DropTable(plan) => PyDropTable::from(plan.clone()).to_variant(py), - DdlStatement::DropView(plan) => PyDropView::from(plan.clone()).to_variant(py), - DdlStatement::DropCatalogSchema(plan) => { - PyDropCatalogSchema::from(plan.clone()).to_variant(py) - } - DdlStatement::CreateFunction(plan) => { - PyCreateFunction::from(plan.clone()).to_variant(py) - } - DdlStatement::DropFunction(plan) => { - PyDropFunction::from(plan.clone()).to_variant(py) - } - }, - LogicalPlan::Copy(plan) => PyCopyTo::from(plan.clone()).to_variant(py), - LogicalPlan::DescribeTable(plan) => PyDescribeTable::from(plan.clone()).to_variant(py), - LogicalPlan::RecursiveQuery(plan) => { - PyRecursiveQuery::from(plan.clone()).to_variant(py) - } - } - } - - /// Get the inputs to this plan - fn inputs(&self) -> Vec { - let mut inputs = vec![]; - for input in self.plan.inputs() { - inputs.push(input.to_owned().into()); - } - inputs - } - - fn __repr__(&self) -> PyResult { - Ok(format!("{:?}", self.plan)) - } - - fn display(&self) -> String { - format!("{}", self.plan.display()) - } - - fn display_indent(&self) -> String { - format!("{}", self.plan.display_indent()) - } - - fn display_indent_schema(&self) -> String { - format!("{}", self.plan.display_indent_schema()) - } - - fn display_graphviz(&self) -> String { - format!("{}", self.plan.display_graphviz()) - } - - pub fn to_proto<'py>(&'py self, py: Python<'py>) -> PyDataFusionResult> { - let codec = DefaultLogicalExtensionCodec {}; - let proto = - datafusion_proto::protobuf::LogicalPlanNode::try_from_logical_plan(&self.plan, &codec)?; - - let bytes = proto.encode_to_vec(); - Ok(PyBytes::new(py, &bytes)) - } - - #[staticmethod] - pub fn from_proto( - ctx: PySessionContext, - proto_msg: Bound<'_, PyBytes>, - ) -> PyDataFusionResult { - let bytes: &[u8] = proto_msg.extract().map_err(Into::::into)?; - let proto_plan = - datafusion_proto::protobuf::LogicalPlanNode::decode(bytes).map_err(|e| { - PyRuntimeError::new_err(format!( - "Unable to decode logical node from serialized bytes: {e}" - )) - })?; - - let codec = DefaultLogicalExtensionCodec {}; - let plan = proto_plan.try_into_logical_plan(&ctx.ctx.task_ctx(), &codec)?; - Ok(Self::new(plan)) - } -} - -impl From for LogicalPlan { - fn from(logical_plan: PyLogicalPlan) -> LogicalPlan { - logical_plan.plan.as_ref().clone() - } -} - -impl From for PyLogicalPlan { - fn from(logical_plan: LogicalPlan) -> PyLogicalPlan { - PyLogicalPlan { - plan: Arc::new(logical_plan), - } - } -} diff --git a/src/sql/util.rs b/src/sql/util.rs deleted file mode 100644 index d1e8964f8..000000000 --- a/src/sql/util.rs +++ /dev/null @@ -1,87 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::collections::HashMap; - -use datafusion::common::{DataFusionError, exec_err, plan_datafusion_err}; -use datafusion::logical_expr::sqlparser::dialect::dialect_from_str; -use datafusion::sql::sqlparser::dialect::Dialect; -use datafusion::sql::sqlparser::parser::Parser; -use datafusion::sql::sqlparser::tokenizer::{Token, Tokenizer}; - -fn tokens_from_replacements( - placeholder: &str, - replacements: &HashMap>, -) -> Option> { - if let Some(pattern) = placeholder.strip_prefix("$") { - replacements.get(pattern).cloned() - } else { - None - } -} - -fn get_tokens_for_string_replacement( - dialect: &dyn Dialect, - replacements: HashMap, -) -> Result>, DataFusionError> { - replacements - .into_iter() - .map(|(name, value)| { - let tokens = Tokenizer::new(dialect, &value) - .tokenize() - .map_err(|err| DataFusionError::External(err.into()))?; - Ok((name, tokens)) - }) - .collect() -} - -pub(crate) fn replace_placeholders_with_strings( - query: &str, - dialect: &str, - replacements: HashMap, -) -> Result { - let dialect = dialect_from_str(dialect) - .ok_or_else(|| plan_datafusion_err!("Unsupported SQL dialect: {dialect}."))?; - - let replacements = get_tokens_for_string_replacement(dialect.as_ref(), replacements)?; - - let tokens = Tokenizer::new(dialect.as_ref(), query) - .tokenize() - .map_err(|err| DataFusionError::External(err.into()))?; - - let replaced_tokens = tokens - .into_iter() - .flat_map(|token| { - if let Token::Placeholder(placeholder) = &token { - tokens_from_replacements(placeholder, &replacements).unwrap_or(vec![token]) - } else { - vec![token] - } - }) - .collect::>(); - - let statement = Parser::new(dialect.as_ref()) - .with_tokens(replaced_tokens) - .parse_statements() - .map_err(|err| DataFusionError::External(Box::new(err)))?; - - if statement.len() != 1 { - return exec_err!("placeholder replacement should return exactly one statement"); - } - - Ok(statement[0].to_string()) -} diff --git a/src/store.rs b/src/store.rs deleted file mode 100644 index 8535e83b7..000000000 --- a/src/store.rs +++ /dev/null @@ -1,298 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use object_store::aws::{AmazonS3, AmazonS3Builder}; -use object_store::azure::{MicrosoftAzure, MicrosoftAzureBuilder}; -use object_store::gcp::{GoogleCloudStorage, GoogleCloudStorageBuilder}; -use object_store::http::{HttpBuilder, HttpStore}; -use object_store::local::LocalFileSystem; -use pyo3::exceptions::PyValueError; -use pyo3::prelude::*; -use url::Url; - -#[derive(FromPyObject)] -pub enum StorageContexts { - AmazonS3(PyAmazonS3Context), - GoogleCloudStorage(PyGoogleCloudContext), - MicrosoftAzure(PyMicrosoftAzureContext), - LocalFileSystem(PyLocalFileSystemContext), - HTTP(PyHttpContext), -} - -#[pyclass( - from_py_object, - frozen, - name = "LocalFileSystem", - module = "datafusion.store", - subclass -)] -#[derive(Debug, Clone)] -pub struct PyLocalFileSystemContext { - pub inner: Arc, -} - -#[pymethods] -impl PyLocalFileSystemContext { - #[pyo3(signature = (prefix=None))] - #[new] - fn new(prefix: Option) -> Self { - if let Some(prefix) = prefix { - Self { - inner: Arc::new( - LocalFileSystem::new_with_prefix(prefix) - .expect("Could not create local LocalFileSystem"), - ), - } - } else { - Self { - inner: Arc::new(LocalFileSystem::new()), - } - } - } -} - -#[pyclass( - from_py_object, - frozen, - name = "MicrosoftAzure", - module = "datafusion.store", - subclass -)] -#[derive(Debug, Clone)] -pub struct PyMicrosoftAzureContext { - pub inner: Arc, - pub container_name: String, -} - -#[pymethods] -impl PyMicrosoftAzureContext { - #[allow(clippy::too_many_arguments)] - #[pyo3(signature = (container_name, account=None, access_key=None, bearer_token=None, client_id=None, client_secret=None, tenant_id=None, sas_query_pairs=None, use_emulator=None, allow_http=None, use_fabric_endpoint=None))] - #[new] - fn new( - container_name: String, - account: Option, - access_key: Option, - bearer_token: Option, - client_id: Option, - client_secret: Option, - tenant_id: Option, - sas_query_pairs: Option>, - use_emulator: Option, - allow_http: Option, - use_fabric_endpoint: Option, - ) -> Self { - let mut builder = MicrosoftAzureBuilder::from_env().with_container_name(&container_name); - - if let Some(account) = account { - builder = builder.with_account(account); - } - - if let Some(access_key) = access_key { - builder = builder.with_access_key(access_key); - } - - if let Some(bearer_token) = bearer_token { - builder = builder.with_bearer_token_authorization(bearer_token); - } - - match (client_id, client_secret, tenant_id) { - (Some(client_id), Some(client_secret), Some(tenant_id)) => { - builder = - builder.with_client_secret_authorization(client_id, client_secret, tenant_id); - } - (None, None, None) => {} - _ => { - panic!("client_id, client_secret, tenat_id must be all set or all None"); - } - } - - if let Some(sas_query_pairs) = sas_query_pairs { - builder = builder.with_sas_authorization(sas_query_pairs); - } - - if let Some(use_emulator) = use_emulator { - builder = builder.with_use_emulator(use_emulator); - } - - if let Some(allow_http) = allow_http { - builder = builder.with_allow_http(allow_http); - } - - if let Some(use_fabric_endpoint) = use_fabric_endpoint { - builder = builder.with_use_fabric_endpoint(use_fabric_endpoint); - } - - Self { - inner: Arc::new( - builder - .build() - .expect("Could not create Azure Storage context"), //TODO: change these to PyErr - ), - container_name, - } - } -} - -#[pyclass( - from_py_object, - frozen, - name = "GoogleCloud", - module = "datafusion.store", - subclass -)] -#[derive(Debug, Clone)] -pub struct PyGoogleCloudContext { - pub inner: Arc, - pub bucket_name: String, -} - -#[pymethods] -impl PyGoogleCloudContext { - #[allow(clippy::too_many_arguments)] - #[pyo3(signature = (bucket_name, service_account_path=None))] - #[new] - fn new(bucket_name: String, service_account_path: Option) -> Self { - let mut builder = GoogleCloudStorageBuilder::new().with_bucket_name(&bucket_name); - - if let Some(credential_path) = service_account_path { - builder = builder.with_service_account_path(credential_path); - } - - Self { - inner: Arc::new( - builder - .build() - .expect("Could not create Google Cloud Storage"), - ), - bucket_name, - } - } -} - -#[pyclass( - from_py_object, - frozen, - name = "AmazonS3", - module = "datafusion.store", - subclass -)] -#[derive(Debug, Clone)] -pub struct PyAmazonS3Context { - pub inner: Arc, - pub bucket_name: String, -} - -#[pymethods] -impl PyAmazonS3Context { - #[allow(clippy::too_many_arguments)] - #[pyo3(signature = (bucket_name, region=None, access_key_id=None, secret_access_key=None, session_token=None, endpoint=None, allow_http=false, imdsv1_fallback=false))] - #[new] - fn new( - bucket_name: String, - region: Option, - access_key_id: Option, - secret_access_key: Option, - session_token: Option, - endpoint: Option, - //retry_config: RetryConfig, - allow_http: bool, - imdsv1_fallback: bool, - ) -> Self { - // start w/ the options that come directly from the environment - let mut builder = AmazonS3Builder::from_env(); - - if let Some(region) = region { - builder = builder.with_region(region); - } - - if let Some(access_key_id) = access_key_id { - builder = builder.with_access_key_id(access_key_id); - }; - - if let Some(secret_access_key) = secret_access_key { - builder = builder.with_secret_access_key(secret_access_key); - }; - - if let Some(session_token) = session_token { - builder = builder.with_token(session_token); - } - - if let Some(endpoint) = endpoint { - builder = builder.with_endpoint(endpoint); - }; - - if imdsv1_fallback { - builder = builder.with_imdsv1_fallback(); - }; - - let store = builder - .with_bucket_name(bucket_name.clone()) - //.with_retry_config(retry_config) #TODO: add later - .with_allow_http(allow_http) - .build() - .expect("failed to build AmazonS3"); - - Self { - inner: Arc::new(store), - bucket_name, - } - } -} - -#[pyclass( - from_py_object, - frozen, - name = "Http", - module = "datafusion.store", - subclass -)] -#[derive(Debug, Clone)] -pub struct PyHttpContext { - pub url: String, - pub store: Arc, -} - -#[pymethods] -impl PyHttpContext { - #[new] - fn new(url: String) -> PyResult { - let store = match Url::parse(url.as_str()) { - Ok(url) => HttpBuilder::new() - .with_url(url.origin().ascii_serialization()) - .build(), - Err(_) => HttpBuilder::new().build(), - } - .map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string())))?; - - Ok(Self { - url, - store: Arc::new(store), - }) - } -} - -pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - Ok(()) -} diff --git a/src/substrait.rs b/src/substrait.rs deleted file mode 100644 index c2f112520..000000000 --- a/src/substrait.rs +++ /dev/null @@ -1,195 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion_substrait::logical_plan::{consumer, producer}; -use datafusion_substrait::serializer; -use datafusion_substrait::substrait::proto::Plan; -use prost::Message; -use pyo3::prelude::*; -use pyo3::types::PyBytes; - -use crate::context::PySessionContext; -use crate::errors::{PyDataFusionError, PyDataFusionResult, py_datafusion_err, to_datafusion_err}; -use crate::sql::logical::PyLogicalPlan; -use crate::utils::wait_for_future; - -#[pyclass( - from_py_object, - frozen, - name = "Plan", - module = "datafusion.substrait", - subclass -)] -#[derive(Debug, Clone)] -pub struct PyPlan { - pub plan: Plan, -} - -#[pymethods] -impl PyPlan { - fn encode(&self, py: Python) -> PyResult> { - let mut proto_bytes = Vec::::new(); - self.plan - .encode(&mut proto_bytes) - .map_err(PyDataFusionError::EncodeError)?; - Ok(PyBytes::new(py, &proto_bytes).into()) - } - - /// Get the JSON representation of the substrait plan - fn to_json(&self) -> PyDataFusionResult { - let json = serde_json::to_string_pretty(&self.plan).map_err(to_datafusion_err)?; - Ok(json) - } - - /// Parse a Substrait Plan from its JSON representation - #[staticmethod] - fn from_json(json: &str) -> PyDataFusionResult { - let plan: Plan = serde_json::from_str(json).map_err(to_datafusion_err)?; - Ok(PyPlan { plan }) - } -} - -impl From for Plan { - fn from(plan: PyPlan) -> Plan { - plan.plan - } -} - -impl From for PyPlan { - fn from(plan: Plan) -> PyPlan { - PyPlan { plan } - } -} - -/// A PySubstraitSerializer is a representation of a Serializer that is capable of both serializing -/// a `LogicalPlan` instance to Substrait Protobuf bytes and also deserialize Substrait Protobuf bytes -/// to a valid `LogicalPlan` instance. -#[pyclass( - from_py_object, - frozen, - name = "Serde", - module = "datafusion.substrait", - subclass -)] -#[derive(Debug, Clone)] -pub struct PySubstraitSerializer; - -#[pymethods] -impl PySubstraitSerializer { - #[staticmethod] - pub fn serialize( - sql: &str, - ctx: PySessionContext, - path: &str, - py: Python, - ) -> PyDataFusionResult<()> { - wait_for_future(py, serializer::serialize(sql, &ctx.ctx, path))??; - Ok(()) - } - - #[staticmethod] - pub fn serialize_to_plan( - sql: &str, - ctx: PySessionContext, - py: Python, - ) -> PyDataFusionResult { - PySubstraitSerializer::serialize_bytes(sql, ctx, py).and_then(|proto_bytes| { - let proto_bytes = proto_bytes.bind(py).cast::().unwrap(); - PySubstraitSerializer::deserialize_bytes(proto_bytes.as_bytes().to_vec(), py) - }) - } - - #[staticmethod] - pub fn serialize_bytes( - sql: &str, - ctx: PySessionContext, - py: Python, - ) -> PyDataFusionResult> { - let proto_bytes: Vec = - wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx))??; - Ok(PyBytes::new(py, &proto_bytes).into()) - } - - #[staticmethod] - pub fn deserialize(path: &str, py: Python) -> PyDataFusionResult { - let plan = wait_for_future(py, serializer::deserialize(path))??; - Ok(PyPlan { plan: *plan }) - } - - #[staticmethod] - pub fn deserialize_bytes(proto_bytes: Vec, py: Python) -> PyDataFusionResult { - let plan = wait_for_future(py, serializer::deserialize_bytes(proto_bytes))??; - Ok(PyPlan { plan: *plan }) - } -} - -#[pyclass( - from_py_object, - frozen, - name = "Producer", - module = "datafusion.substrait", - subclass -)] -#[derive(Debug, Clone)] -pub struct PySubstraitProducer; - -#[pymethods] -impl PySubstraitProducer { - /// Convert DataFusion LogicalPlan to Substrait Plan - #[staticmethod] - pub fn to_substrait_plan(plan: PyLogicalPlan, ctx: &PySessionContext) -> PyResult { - let session_state = ctx.ctx.state(); - match producer::to_substrait_plan(&plan.plan, &session_state) { - Ok(plan) => Ok(PyPlan { plan: *plan }), - Err(e) => Err(py_datafusion_err(e)), - } - } -} - -#[pyclass( - from_py_object, - frozen, - name = "Consumer", - module = "datafusion.substrait", - subclass -)] -#[derive(Debug, Clone)] -pub struct PySubstraitConsumer; - -#[pymethods] -impl PySubstraitConsumer { - /// Convert Substrait Plan to DataFusion DataFrame - #[staticmethod] - pub fn from_substrait_plan( - ctx: &PySessionContext, - plan: PyPlan, - py: Python, - ) -> PyDataFusionResult { - let session_state = ctx.ctx.state(); - let result = consumer::from_substrait_plan(&session_state, &plan.plan); - let logical_plan = wait_for_future(py, result)??; - Ok(PyLogicalPlan::new(logical_plan)) - } -} - -pub fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - Ok(()) -} diff --git a/src/table.rs b/src/table.rs deleted file mode 100644 index b9f30af9c..000000000 --- a/src/table.rs +++ /dev/null @@ -1,208 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::any::Any; -use std::sync::Arc; - -use arrow::datatypes::SchemaRef; -use arrow::pyarrow::ToPyArrow; -use async_trait::async_trait; -use datafusion::catalog::Session; -use datafusion::common::Column; -use datafusion::datasource::{TableProvider, TableType}; -use datafusion::logical_expr::{Expr, LogicalPlanBuilder, TableProviderFilterPushDown}; -use datafusion::physical_plan::ExecutionPlan; -use datafusion::prelude::DataFrame; -use pyo3::IntoPyObjectExt; -use pyo3::prelude::*; - -use crate::context::PySessionContext; -use crate::dataframe::PyDataFrame; -use crate::dataset::Dataset; -use crate::utils::table_provider_from_pycapsule; - -/// This struct is used as a common method for all TableProviders, -/// whether they refer to an FFI provider, an internally known -/// implementation, a dataset, or a dataframe view. -#[pyclass( - from_py_object, - frozen, - name = "RawTable", - module = "datafusion.catalog", - subclass -)] -#[derive(Clone)] -pub struct PyTable { - pub table: Arc, -} - -impl PyTable { - pub fn table(&self) -> Arc { - self.table.clone() - } -} - -#[pymethods] -impl PyTable { - /// Instantiate from any Python object that supports any of the table - /// types. We do not know a priori when using this method if the object - /// will be passed a wrapped or raw class. Here we handle all of the - /// following object types: - /// - /// - PyTable (essentially a clone operation), but either raw or wrapped - /// - DataFrame, either raw or wrapped - /// - FFI Table Providers via PyCapsule - /// - PyArrow Dataset objects - #[new] - pub fn new(obj: Bound<'_, PyAny>, session: Option>) -> PyResult { - let py = obj.py(); - if let Ok(py_table) = obj.extract::() { - Ok(py_table) - } else if let Ok(py_table) = obj - .getattr("_inner") - .and_then(|inner| inner.extract::().map_err(Into::::into)) - { - Ok(py_table) - } else if let Ok(py_df) = obj.extract::() { - let provider = py_df.inner_df().as_ref().clone().into_view(); - Ok(PyTable::from(provider)) - } else if let Ok(py_df) = obj - .getattr("df") - .and_then(|inner| inner.extract::().map_err(Into::::into)) - { - let provider = py_df.inner_df().as_ref().clone().into_view(); - Ok(PyTable::from(provider)) - } else if let Some(provider) = { - let session = match session { - Some(session) => session, - None => PySessionContext::global_ctx()?.into_bound_py_any(obj.py())?, - }; - table_provider_from_pycapsule(obj.clone(), session)? - } { - Ok(PyTable::from(provider)) - } else { - let provider = Arc::new(Dataset::new(&obj, py)?) as Arc; - Ok(PyTable::from(provider)) - } - } - - /// Get a reference to the schema for this table - #[getter] - fn schema<'py>(&self, py: Python<'py>) -> PyResult> { - self.table.schema().to_pyarrow(py) - } - - /// Get the type of this table for metadata/catalog purposes. - #[getter] - fn kind(&self) -> &str { - match self.table.table_type() { - TableType::Base => "physical", - TableType::View => "view", - TableType::Temporary => "temporary", - } - } - - fn __repr__(&self) -> PyResult { - let kind = self.kind(); - Ok(format!("Table(kind={kind})")) - } -} - -impl From> for PyTable { - fn from(table: Arc) -> Self { - Self { table } - } -} - -#[derive(Clone, Debug)] -pub(crate) struct TempViewTable { - df: Arc, -} - -/// This is nearly identical to `DataFrameTableProvider` -/// except that it is for temporary tables. -/// Remove when https://github.com/apache/datafusion/issues/18026 -/// closes. -impl TempViewTable { - pub(crate) fn new(df: Arc) -> Self { - Self { df } - } -} - -#[async_trait] -impl TableProvider for TempViewTable { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> SchemaRef { - Arc::new(self.df.schema().as_arrow().clone()) - } - - fn table_type(&self) -> TableType { - TableType::Temporary - } - - async fn scan( - &self, - state: &dyn Session, - projection: Option<&Vec>, - filters: &[Expr], - limit: Option, - ) -> datafusion::common::Result> { - let filter = filters.iter().cloned().reduce(|acc, new| acc.and(new)); - let plan = self.df.logical_plan().clone(); - let mut plan = LogicalPlanBuilder::from(plan); - - if let Some(filter) = filter { - plan = plan.filter(filter)?; - } - - let mut plan = if let Some(projection) = projection { - // avoiding adding a redundant projection (e.g. SELECT * FROM view) - let current_projection = (0..plan.schema().fields().len()).collect::>(); - if projection == ¤t_projection { - plan - } else { - let fields: Vec = projection - .iter() - .map(|i| { - Expr::Column(Column::from( - self.df.logical_plan().schema().qualified_field(*i), - )) - }) - .collect(); - plan.project(fields)? - } - } else { - plan - }; - - if let Some(limit) = limit { - plan = plan.limit(0, Some(limit))?; - } - - state.create_physical_plan(&plan.build()?).await - } - - fn supports_filters_pushdown( - &self, - filters: &[&Expr], - ) -> datafusion::common::Result> { - Ok(vec![TableProviderFilterPushDown::Exact; filters.len()]) - } -} diff --git a/src/udaf.rs b/src/udaf.rs deleted file mode 100644 index 7ba499c66..000000000 --- a/src/udaf.rs +++ /dev/null @@ -1,237 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::ptr::NonNull; -use std::sync::Arc; - -use datafusion::arrow::array::ArrayRef; -use datafusion::arrow::datatypes::DataType; -use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; -use datafusion::common::ScalarValue; -use datafusion::error::{DataFusionError, Result}; -use datafusion::logical_expr::{ - Accumulator, AccumulatorFactoryFunction, AggregateUDF, AggregateUDFImpl, create_udaf, -}; -use datafusion_ffi::udaf::FFI_AggregateUDF; -use pyo3::ffi::c_str; -use pyo3::prelude::*; -use pyo3::types::{PyCapsule, PyTuple}; - -use crate::common::data_type::PyScalarValue; -use crate::errors::{PyDataFusionResult, py_datafusion_err, to_datafusion_err}; -use crate::expr::PyExpr; -use crate::utils::{parse_volatility, validate_pycapsule}; - -#[derive(Debug)] -struct RustAccumulator { - accum: Py, -} - -impl RustAccumulator { - fn new(accum: Py) -> Self { - Self { accum } - } -} - -impl Accumulator for RustAccumulator { - fn state(&mut self) -> Result> { - Python::attach(|py| -> PyResult> { - let values = self.accum.bind(py).call_method0("state")?; - let mut scalars = Vec::new(); - for item in values.try_iter()? { - let item: Bound<'_, PyAny> = item?; - let scalar = item.extract::()?.0; - scalars.push(scalar); - } - Ok(scalars) - }) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) - } - - fn evaluate(&mut self) -> Result { - Python::attach(|py| -> PyResult { - let value = self.accum.bind(py).call_method0("evaluate")?; - value.extract::().map(|v| v.0) - }) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) - } - - fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { - Python::attach(|py| { - // 1. cast args to Pyarrow array - let py_args = values - .iter() - .map(|arg| arg.to_data().to_pyarrow(py).unwrap()) - .collect::>(); - let py_args = PyTuple::new(py, py_args).map_err(to_datafusion_err)?; - - // 2. call function - self.accum - .bind(py) - .call_method1("update", py_args) - .map_err(|e| DataFusionError::Execution(format!("{e}")))?; - - Ok(()) - }) - } - - fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { - Python::attach(|py| { - // // 1. cast states to Pyarrow arrays - let py_states: Result>> = states - .iter() - .map(|state| { - state - .to_data() - .to_pyarrow(py) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) - }) - .collect(); - - // 2. call merge - self.accum - .bind(py) - .call_method1("merge", (py_states?,)) - .map_err(|e| DataFusionError::Execution(format!("{e}")))?; - - Ok(()) - }) - } - - fn size(&self) -> usize { - std::mem::size_of_val(self) - } - - fn retract_batch(&mut self, values: &[ArrayRef]) -> Result<()> { - Python::attach(|py| { - // 1. cast args to Pyarrow array - let py_args = values - .iter() - .map(|arg| arg.to_data().to_pyarrow(py).unwrap()) - .collect::>(); - let py_args = PyTuple::new(py, py_args).map_err(to_datafusion_err)?; - - // 2. call function - self.accum - .bind(py) - .call_method1("retract_batch", py_args) - .map_err(|e| DataFusionError::Execution(format!("{e}")))?; - - Ok(()) - }) - } - - fn supports_retract_batch(&self) -> bool { - Python::attach( - |py| match self.accum.bind(py).call_method0("supports_retract_batch") { - Ok(x) => x.extract().unwrap_or(false), - Err(_) => false, - }, - ) - } -} - -pub fn to_rust_accumulator(accum: Py) -> AccumulatorFactoryFunction { - Arc::new(move |_args| -> Result> { - let accum = Python::attach(|py| { - accum - .call0(py) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) - })?; - Ok(Box::new(RustAccumulator::new(accum))) - }) -} - -fn aggregate_udf_from_capsule(capsule: &Bound<'_, PyCapsule>) -> PyDataFusionResult { - validate_pycapsule(capsule, "datafusion_aggregate_udf")?; - - let data: NonNull = capsule - .pointer_checked(Some(c_str!("datafusion_aggregate_udf")))? - .cast(); - let udaf = unsafe { data.as_ref() }; - let udaf: Arc = udaf.into(); - - Ok(AggregateUDF::new_from_shared_impl(udaf)) -} - -/// Represents an AggregateUDF -#[pyclass( - from_py_object, - frozen, - name = "AggregateUDF", - module = "datafusion", - subclass -)] -#[derive(Debug, Clone)] -pub struct PyAggregateUDF { - pub(crate) function: AggregateUDF, -} - -#[pymethods] -impl PyAggregateUDF { - #[new] - #[pyo3(signature=(name, accumulator, input_type, return_type, state_type, volatility))] - fn new( - name: &str, - accumulator: Py, - input_type: PyArrowType>, - return_type: PyArrowType, - state_type: PyArrowType>, - volatility: &str, - ) -> PyResult { - let function = create_udaf( - name, - input_type.0, - Arc::new(return_type.0), - parse_volatility(volatility)?, - to_rust_accumulator(accumulator), - Arc::new(state_type.0), - ); - Ok(Self { function }) - } - - #[staticmethod] - pub fn from_pycapsule(func: Bound<'_, PyAny>) -> PyDataFusionResult { - if func.is_instance_of::() { - let capsule = func.cast::().map_err(py_datafusion_err)?; - let function = aggregate_udf_from_capsule(capsule)?; - return Ok(Self { function }); - } - - if func.hasattr("__datafusion_aggregate_udf__")? { - let capsule = func.getattr("__datafusion_aggregate_udf__")?.call0()?; - let capsule = capsule.cast::().map_err(py_datafusion_err)?; - let function = aggregate_udf_from_capsule(capsule)?; - return Ok(Self { function }); - } - - Err(crate::errors::PyDataFusionError::Common( - "__datafusion_aggregate_udf__ does not exist on AggregateUDF object.".to_string(), - )) - } - - /// creates a new PyExpr with the call of the udf - #[pyo3(signature = (*args))] - fn __call__(&self, args: Vec) -> PyResult { - let args = args.iter().map(|e| e.expr.clone()).collect(); - Ok(self.function.call(args).into()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("AggregateUDF({})", self.function.name())) - } -} diff --git a/src/udf.rs b/src/udf.rs deleted file mode 100644 index 2d60abc09..000000000 --- a/src/udf.rs +++ /dev/null @@ -1,226 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::any::Any; -use std::hash::{Hash, Hasher}; -use std::ptr::NonNull; -use std::sync::Arc; - -use arrow::datatypes::{Field, FieldRef}; -use arrow::pyarrow::ToPyArrow; -use datafusion::arrow::array::{ArrayData, make_array}; -use datafusion::arrow::datatypes::DataType; -use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType}; -use datafusion::common::internal_err; -use datafusion::error::DataFusionError; -use datafusion::logical_expr::{ - ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, - Volatility, -}; -use datafusion_ffi::udf::FFI_ScalarUDF; -use pyo3::ffi::c_str; -use pyo3::prelude::*; -use pyo3::types::{PyCapsule, PyTuple}; - -use crate::array::PyArrowArrayExportable; -use crate::errors::{PyDataFusionResult, py_datafusion_err, to_datafusion_err}; -use crate::expr::PyExpr; -use crate::utils::{parse_volatility, validate_pycapsule}; - -/// This struct holds the Python written function that is a -/// ScalarUDF. -#[derive(Debug)] -struct PythonFunctionScalarUDF { - name: String, - func: Py, - signature: Signature, - return_field: FieldRef, -} - -impl PythonFunctionScalarUDF { - fn new( - name: String, - func: Py, - input_fields: Vec, - return_field: Field, - volatility: Volatility, - ) -> Self { - let input_types = input_fields.iter().map(|f| f.data_type().clone()).collect(); - let signature = Signature::exact(input_types, volatility); - Self { - name, - func, - signature, - return_field: Arc::new(return_field), - } - } -} - -impl Eq for PythonFunctionScalarUDF {} -impl PartialEq for PythonFunctionScalarUDF { - fn eq(&self, other: &Self) -> bool { - self.name == other.name - && self.signature == other.signature - && self.return_field == other.return_field - && Python::attach(|py| self.func.bind(py).eq(other.func.bind(py)).unwrap_or(false)) - } -} - -impl Hash for PythonFunctionScalarUDF { - fn hash(&self, state: &mut H) { - self.name.hash(state); - self.signature.hash(state); - self.return_field.hash(state); - - Python::attach(|py| { - let py_hash = self.func.bind(py).hash().unwrap_or(0); // Handle unhashable objects - - state.write_isize(py_hash); - }); - } -} - -impl ScalarUDFImpl for PythonFunctionScalarUDF { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - &self.name - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, _arg_types: &[DataType]) -> datafusion::common::Result { - internal_err!( - "return_field should not be called when return_field_from_args is implemented." - ) - } - - fn return_field_from_args( - &self, - _args: ReturnFieldArgs, - ) -> datafusion::common::Result { - Ok(Arc::clone(&self.return_field)) - } - - fn invoke_with_args( - &self, - args: ScalarFunctionArgs, - ) -> datafusion::common::Result { - let num_rows = args.number_rows; - Python::attach(|py| { - // 1. cast args to Pyarrow arrays - let py_args = args - .args - .into_iter() - .zip(args.arg_fields) - .map(|(arg, field)| { - let array = arg.to_array(num_rows)?; - PyArrowArrayExportable::new(array, field) - .to_pyarrow(py) - .map_err(to_datafusion_err) - }) - .collect::, _>>()?; - let py_args = PyTuple::new(py, py_args).map_err(to_datafusion_err)?; - - // 2. call function - let value = self - .func - .call(py, py_args, None) - .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?; - - // 3. cast to arrow::array::Array - let array_data = ArrayData::from_pyarrow_bound(value.bind(py)) - .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?; - Ok(ColumnarValue::Array(make_array(array_data))) - }) - } -} - -/// Represents a PyScalarUDF -#[pyclass( - from_py_object, - frozen, - name = "ScalarUDF", - module = "datafusion", - subclass -)] -#[derive(Debug, Clone)] -pub struct PyScalarUDF { - pub(crate) function: ScalarUDF, -} - -#[pymethods] -impl PyScalarUDF { - #[new] - #[pyo3(signature=(name, func, input_types, return_type, volatility))] - fn new( - name: String, - func: Py, - input_types: PyArrowType>, - return_type: PyArrowType, - volatility: &str, - ) -> PyResult { - let py_function = PythonFunctionScalarUDF::new( - name, - func, - input_types.0, - return_type.0, - parse_volatility(volatility)?, - ); - let function = ScalarUDF::new_from_impl(py_function); - - Ok(Self { function }) - } - - #[staticmethod] - pub fn from_pycapsule(func: Bound<'_, PyAny>) -> PyDataFusionResult { - if func.hasattr("__datafusion_scalar_udf__")? { - let capsule = func.getattr("__datafusion_scalar_udf__")?.call0()?; - let capsule = capsule.cast::().map_err(py_datafusion_err)?; - validate_pycapsule(capsule, "datafusion_scalar_udf")?; - - let data: NonNull = capsule - .pointer_checked(Some(c_str!("datafusion_scalar_udf")))? - .cast(); - let udf = unsafe { data.as_ref() }; - let udf: Arc = udf.into(); - - Ok(Self { - function: ScalarUDF::new_from_shared_impl(udf), - }) - } else { - Err(crate::errors::PyDataFusionError::Common( - "__datafusion_scalar_udf__ does not exist on ScalarUDF object.".to_string(), - )) - } - } - - /// creates a new PyExpr with the call of the udf - #[pyo3(signature = (*args))] - fn __call__(&self, args: Vec) -> PyResult { - let args = args.iter().map(|e| e.expr.clone()).collect(); - Ok(self.function.call(args).into()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("ScalarUDF({})", self.function.name())) - } -} diff --git a/src/udtf.rs b/src/udtf.rs deleted file mode 100644 index 24df93e2b..000000000 --- a/src/udtf.rs +++ /dev/null @@ -1,138 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::ptr::NonNull; -use std::sync::Arc; - -use datafusion::catalog::{TableFunctionImpl, TableProvider}; -use datafusion::error::Result as DataFusionResult; -use datafusion::logical_expr::Expr; -use datafusion_ffi::udtf::FFI_TableFunction; -use pyo3::IntoPyObjectExt; -use pyo3::exceptions::{PyImportError, PyTypeError}; -use pyo3::ffi::c_str; -use pyo3::prelude::*; -use pyo3::types::{PyCapsule, PyTuple, PyType}; - -use crate::context::PySessionContext; -use crate::errors::{py_datafusion_err, to_datafusion_err}; -use crate::expr::PyExpr; -use crate::table::PyTable; -use crate::utils::validate_pycapsule; - -/// Represents a user defined table function -#[pyclass(from_py_object, frozen, name = "TableFunction", module = "datafusion")] -#[derive(Debug, Clone)] -pub struct PyTableFunction { - pub(crate) name: String, - pub(crate) inner: PyTableFunctionInner, -} - -// TODO: Implement pure python based user defined table functions -#[derive(Debug, Clone)] -pub(crate) enum PyTableFunctionInner { - PythonFunction(Arc>), - FFIFunction(Arc), -} - -#[pymethods] -impl PyTableFunction { - #[new] - #[pyo3(signature=(name, func, session))] - pub fn new( - name: &str, - func: Bound<'_, PyAny>, - session: Option>, - ) -> PyResult { - let inner = if func.hasattr("__datafusion_table_function__")? { - let py = func.py(); - let session = match session { - Some(session) => session, - None => PySessionContext::global_ctx()?.into_bound_py_any(py)?, - }; - let capsule = func - .getattr("__datafusion_table_function__")? - .call1((session,)).map_err(|err| { - if err.get_type(py).is(PyType::new::(py)) { - PyImportError::new_err("Incompatible libraries. DataFusion 52.0.0 introduced an incompatible signature change for table functions. Either downgrade DataFusion or upgrade your function library.") - } else { - err - } - })?; - let capsule = capsule.cast::().map_err(py_datafusion_err)?; - validate_pycapsule(capsule, "datafusion_table_function")?; - - let data: NonNull = capsule - .pointer_checked(Some(c_str!("datafusion_table_function")))? - .cast(); - let ffi_func = unsafe { data.as_ref() }; - let foreign_func: Arc = ffi_func.to_owned().into(); - - PyTableFunctionInner::FFIFunction(foreign_func) - } else { - let py_obj = Arc::new(func.unbind()); - PyTableFunctionInner::PythonFunction(py_obj) - }; - - Ok(Self { - name: name.to_string(), - inner, - }) - } - - #[pyo3(signature = (*args))] - pub fn __call__(&self, args: Vec) -> PyResult { - let args: Vec = args.iter().map(|e| e.expr.clone()).collect(); - let table_provider = self.call(&args).map_err(py_datafusion_err)?; - - Ok(PyTable::from(table_provider)) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("TableUDF({})", self.name)) - } -} - -#[allow(clippy::result_large_err)] -fn call_python_table_function( - func: &Arc>, - args: &[Expr], -) -> DataFusionResult> { - let args = args - .iter() - .map(|arg| PyExpr::from(arg.clone())) - .collect::>(); - - // move |args: &[ArrayRef]| -> Result { - Python::attach(|py| { - let py_args = PyTuple::new(py, args)?; - let provider_obj = func.call1(py, py_args)?; - let provider = provider_obj.bind(py).clone(); - - Ok::, PyErr>(PyTable::new(provider, None)?.table) - }) - .map_err(to_datafusion_err) -} - -impl TableFunctionImpl for PyTableFunction { - fn call(&self, args: &[Expr]) -> DataFusionResult> { - match &self.inner { - PyTableFunctionInner::FFIFunction(func) => func.call(args), - PyTableFunctionInner::PythonFunction(obj) => call_python_table_function(obj, args), - } - } -} diff --git a/src/udwf.rs b/src/udwf.rs deleted file mode 100644 index de63e2f9a..000000000 --- a/src/udwf.rs +++ /dev/null @@ -1,348 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::any::Any; -use std::ops::Range; -use std::ptr::NonNull; -use std::sync::Arc; - -use arrow::array::{Array, ArrayData, ArrayRef, make_array}; -use datafusion::arrow::datatypes::DataType; -use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType, ToPyArrow}; -use datafusion::error::{DataFusionError, Result}; -use datafusion::logical_expr::function::{PartitionEvaluatorArgs, WindowUDFFieldArgs}; -use datafusion::logical_expr::ptr_eq::PtrEq; -use datafusion::logical_expr::window_state::WindowAggState; -use datafusion::logical_expr::{ - PartitionEvaluator, PartitionEvaluatorFactory, Signature, Volatility, WindowUDF, WindowUDFImpl, -}; -use datafusion::scalar::ScalarValue; -use datafusion_ffi::udwf::FFI_WindowUDF; -use pyo3::exceptions::PyValueError; -use pyo3::ffi::c_str; -use pyo3::prelude::*; -use pyo3::types::{PyCapsule, PyList, PyTuple}; - -use crate::common::data_type::PyScalarValue; -use crate::errors::{PyDataFusionResult, py_datafusion_err, to_datafusion_err}; -use crate::expr::PyExpr; -use crate::utils::{parse_volatility, validate_pycapsule}; - -#[derive(Debug)] -struct RustPartitionEvaluator { - evaluator: Py, -} - -impl RustPartitionEvaluator { - fn new(evaluator: Py) -> Self { - Self { evaluator } - } -} - -impl PartitionEvaluator for RustPartitionEvaluator { - fn memoize(&mut self, _state: &mut WindowAggState) -> Result<()> { - Python::attach(|py| self.evaluator.bind(py).call_method0("memoize").map(|_| ())) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) - } - - fn get_range(&self, idx: usize, n_rows: usize) -> Result> { - Python::attach(|py| { - let py_args = vec![idx.into_pyobject(py)?, n_rows.into_pyobject(py)?]; - let py_args = PyTuple::new(py, py_args)?; - - self.evaluator - .bind(py) - .call_method1("get_range", py_args) - .and_then(|v| { - let tuple: Bound<'_, PyTuple> = v.extract()?; - if tuple.len() != 2 { - return Err(PyValueError::new_err(format!( - "Expected get_range to return tuple of length 2. Received length {}", - tuple.len() - ))); - } - - let start: usize = tuple.get_item(0).unwrap().extract()?; - let end: usize = tuple.get_item(1).unwrap().extract()?; - - Ok(Range { start, end }) - }) - }) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) - } - - fn is_causal(&self) -> bool { - Python::attach(|py| { - self.evaluator - .bind(py) - .call_method0("is_causal") - .and_then(|v| v.extract()) - .unwrap_or(false) - }) - } - - fn evaluate_all(&mut self, values: &[ArrayRef], num_rows: usize) -> Result { - Python::attach(|py| { - let py_values = PyList::new( - py, - values - .iter() - .map(|arg| arg.into_data().to_pyarrow(py).unwrap()), - )?; - let py_num_rows = num_rows.into_pyobject(py)?; - let py_args = PyTuple::new(py, vec![py_values.as_any(), &py_num_rows])?; - - self.evaluator - .bind(py) - .call_method1("evaluate_all", py_args) - .map(|v| { - let array_data = ArrayData::from_pyarrow_bound(&v).unwrap(); - make_array(array_data) - }) - }) - .map_err(to_datafusion_err) - } - - fn evaluate(&mut self, values: &[ArrayRef], range: &Range) -> Result { - Python::attach(|py| { - let py_values = PyList::new( - py, - values - .iter() - .map(|arg| arg.into_data().to_pyarrow(py).unwrap()), - )?; - let range_tuple = PyTuple::new(py, vec![range.start, range.end])?; - let py_args = PyTuple::new(py, vec![py_values.as_any(), range_tuple.as_any()])?; - - self.evaluator - .bind(py) - .call_method1("evaluate", py_args) - .and_then(|v| v.extract::()) - .map(|v| v.0) - }) - .map_err(to_datafusion_err) - } - - fn evaluate_all_with_rank( - &self, - num_rows: usize, - ranks_in_partition: &[Range], - ) -> Result { - Python::attach(|py| { - let ranks = ranks_in_partition - .iter() - .map(|r| PyTuple::new(py, vec![r.start, r.end])) - .collect::>>()?; - - // 1. cast args to Pyarrow array - let py_args = vec![ - num_rows.into_pyobject(py)?.into_any(), - PyList::new(py, ranks)?.into_any(), - ]; - - let py_args = PyTuple::new(py, py_args)?; - - // 2. call function - self.evaluator - .bind(py) - .call_method1("evaluate_all_with_rank", py_args) - .map(|v| { - let array_data = ArrayData::from_pyarrow_bound(&v).unwrap(); - make_array(array_data) - }) - }) - .map_err(to_datafusion_err) - } - - fn supports_bounded_execution(&self) -> bool { - Python::attach(|py| { - self.evaluator - .bind(py) - .call_method0("supports_bounded_execution") - .and_then(|v| v.extract()) - .unwrap_or(false) - }) - } - - fn uses_window_frame(&self) -> bool { - Python::attach(|py| { - self.evaluator - .bind(py) - .call_method0("uses_window_frame") - .and_then(|v| v.extract()) - .unwrap_or(false) - }) - } - - fn include_rank(&self) -> bool { - Python::attach(|py| { - self.evaluator - .bind(py) - .call_method0("include_rank") - .and_then(|v| v.extract()) - .unwrap_or(false) - }) - } -} - -pub fn to_rust_partition_evaluator(evaluator: Py) -> PartitionEvaluatorFactory { - Arc::new(move || -> Result> { - let evaluator = Python::attach(|py| { - evaluator - .call0(py) - .map_err(|e| DataFusionError::Execution(e.to_string())) - })?; - Ok(Box::new(RustPartitionEvaluator::new(evaluator))) - }) -} - -/// Represents an WindowUDF -#[pyclass( - from_py_object, - frozen, - name = "WindowUDF", - module = "datafusion", - subclass -)] -#[derive(Debug, Clone)] -pub struct PyWindowUDF { - pub(crate) function: WindowUDF, -} - -#[pymethods] -impl PyWindowUDF { - #[new] - #[pyo3(signature=(name, evaluator, input_types, return_type, volatility))] - fn new( - name: &str, - evaluator: Py, - input_types: Vec>, - return_type: PyArrowType, - volatility: &str, - ) -> PyResult { - let return_type = return_type.0; - let input_types = input_types.into_iter().map(|t| t.0).collect(); - - let function = WindowUDF::from(MultiColumnWindowUDF::new( - name, - input_types, - return_type, - parse_volatility(volatility)?, - to_rust_partition_evaluator(evaluator), - )); - Ok(Self { function }) - } - - /// creates a new PyExpr with the call of the udf - #[pyo3(signature = (*args))] - fn __call__(&self, args: Vec) -> PyResult { - let args = args.iter().map(|e| e.expr.clone()).collect(); - Ok(self.function.call(args).into()) - } - - #[staticmethod] - pub fn from_pycapsule(func: Bound<'_, PyAny>) -> PyDataFusionResult { - let capsule = if func.hasattr("__datafusion_window_udf__")? { - func.getattr("__datafusion_window_udf__")?.call0()? - } else { - func - }; - - let capsule = capsule.cast::().map_err(py_datafusion_err)?; - validate_pycapsule(capsule, "datafusion_window_udf")?; - - let data: NonNull = capsule - .pointer_checked(Some(c_str!("datafusion_window_udf")))? - .cast(); - let udwf = unsafe { data.as_ref() }; - let udwf: Arc = udwf.into(); - - Ok(Self { - function: WindowUDF::new_from_shared_impl(udwf), - }) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("WindowUDF({})", self.function.name())) - } -} - -#[derive(Hash, Eq, PartialEq)] -pub struct MultiColumnWindowUDF { - name: String, - signature: Signature, - return_type: DataType, - partition_evaluator_factory: PtrEq, -} - -impl std::fmt::Debug for MultiColumnWindowUDF { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - f.debug_struct("WindowUDF") - .field("name", &self.name) - .field("signature", &self.signature) - .field("return_type", &"") - .field("partition_evaluator_factory", &"") - .finish() - } -} - -impl MultiColumnWindowUDF { - pub fn new( - name: impl Into, - input_types: Vec, - return_type: DataType, - volatility: Volatility, - partition_evaluator_factory: PartitionEvaluatorFactory, - ) -> Self { - let name = name.into(); - let signature = Signature::exact(input_types, volatility); - Self { - name, - signature, - return_type, - partition_evaluator_factory: partition_evaluator_factory.into(), - } - } -} - -impl WindowUDFImpl for MultiColumnWindowUDF { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - &self.name - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn field(&self, field_args: WindowUDFFieldArgs) -> Result { - // TODO: Should nullable always be `true`? - Ok(arrow::datatypes::Field::new(field_args.name(), self.return_type.clone(), true).into()) - } - - // TODO: Enable passing partition_evaluator_args to python? - fn partition_evaluator( - &self, - _partition_evaluator_args: PartitionEvaluatorArgs, - ) -> Result> { - let _ = _partition_evaluator_args; - (self.partition_evaluator_factory)() - } -} diff --git a/src/unparser/dialect.rs b/src/unparser/dialect.rs deleted file mode 100644 index 52a2da00b..000000000 --- a/src/unparser/dialect.rs +++ /dev/null @@ -1,69 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use datafusion::sql::unparser::dialect::{ - DefaultDialect, Dialect, DuckDBDialect, MySqlDialect, PostgreSqlDialect, SqliteDialect, -}; -use pyo3::prelude::*; - -#[pyclass( - from_py_object, - frozen, - name = "Dialect", - module = "datafusion.unparser", - subclass -)] -#[derive(Clone)] -pub struct PyDialect { - pub dialect: Arc, -} - -#[pymethods] -impl PyDialect { - #[staticmethod] - pub fn default() -> Self { - Self { - dialect: Arc::new(DefaultDialect {}), - } - } - #[staticmethod] - pub fn postgres() -> Self { - Self { - dialect: Arc::new(PostgreSqlDialect {}), - } - } - #[staticmethod] - pub fn mysql() -> Self { - Self { - dialect: Arc::new(MySqlDialect {}), - } - } - #[staticmethod] - pub fn sqlite() -> Self { - Self { - dialect: Arc::new(SqliteDialect {}), - } - } - #[staticmethod] - pub fn duckdb() -> Self { - Self { - dialect: Arc::new(DuckDBDialect::new()), - } - } -} diff --git a/src/unparser/mod.rs b/src/unparser/mod.rs deleted file mode 100644 index 5142b918e..000000000 --- a/src/unparser/mod.rs +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -mod dialect; - -use std::sync::Arc; - -use datafusion::sql::unparser::Unparser; -use datafusion::sql::unparser::dialect::Dialect; -use dialect::PyDialect; -use pyo3::exceptions::PyValueError; -use pyo3::prelude::*; - -use crate::sql::logical::PyLogicalPlan; - -#[pyclass( - from_py_object, - frozen, - name = "Unparser", - module = "datafusion.unparser", - subclass -)] -#[derive(Clone)] -pub struct PyUnparser { - dialect: Arc, - pretty: bool, -} - -#[pymethods] -impl PyUnparser { - #[new] - pub fn new(dialect: PyDialect) -> Self { - Self { - dialect: dialect.dialect.clone(), - pretty: false, - } - } - - pub fn plan_to_sql(&self, plan: &PyLogicalPlan) -> PyResult { - let mut unparser = Unparser::new(self.dialect.as_ref()); - unparser = unparser.with_pretty(self.pretty); - let sql = unparser - .plan_to_sql(&plan.plan()) - .map_err(|e| PyValueError::new_err(e.to_string()))?; - Ok(sql.to_string()) - } - - pub fn with_pretty(&self, pretty: bool) -> Self { - Self { - dialect: self.dialect.clone(), - pretty, - } - } -} - -pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - m.add_class::()?; - Ok(()) -} diff --git a/src/utils.rs b/src/utils.rs deleted file mode 100644 index 5085018f7..000000000 --- a/src/utils.rs +++ /dev/null @@ -1,238 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::future::Future; -use std::ptr::NonNull; -use std::sync::{Arc, OnceLock}; -use std::time::Duration; - -use datafusion::datasource::TableProvider; -use datafusion::execution::context::SessionContext; -use datafusion::logical_expr::Volatility; -use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; -use datafusion_ffi::table_provider::FFI_TableProvider; -use pyo3::IntoPyObjectExt; -use pyo3::exceptions::{PyImportError, PyTypeError, PyValueError}; -use pyo3::ffi::c_str; -use pyo3::prelude::*; -use pyo3::types::{PyCapsule, PyType}; -use tokio::runtime::Runtime; -use tokio::task::JoinHandle; -use tokio::time::sleep; - -use crate::TokioRuntime; -use crate::context::PySessionContext; -use crate::errors::{PyDataFusionError, PyDataFusionResult, py_datafusion_err, to_datafusion_err}; - -/// Utility to get the Tokio Runtime from Python -#[inline] -pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime { - // NOTE: Other pyo3 python libraries have had issues with using tokio - // behind a forking app-server like `gunicorn` - // If we run into that problem, in the future we can look to `delta-rs` - // which adds a check in that disallows calls from a forked process - // https://github.com/delta-io/delta-rs/blob/87010461cfe01563d91a4b9cd6fa468e2ad5f283/python/src/utils.rs#L10-L31 - static RUNTIME: OnceLock = OnceLock::new(); - RUNTIME.get_or_init(|| TokioRuntime(tokio::runtime::Runtime::new().unwrap())) -} - -#[inline] -pub(crate) fn is_ipython_env(py: Python) -> &'static bool { - static IS_IPYTHON_ENV: OnceLock = OnceLock::new(); - IS_IPYTHON_ENV.get_or_init(|| { - py.import("IPython") - .and_then(|ipython| ipython.call_method0("get_ipython")) - .map(|ipython| !ipython.is_none()) - .unwrap_or(false) - }) -} - -/// Utility to get the Global Datafussion CTX -#[inline] -pub(crate) fn get_global_ctx() -> &'static Arc { - static CTX: OnceLock> = OnceLock::new(); - CTX.get_or_init(|| Arc::new(SessionContext::new())) -} - -/// Utility to collect rust futures with GIL released and respond to -/// Python interrupts such as ``KeyboardInterrupt``. If a signal is -/// received while the future is running, the future is aborted and the -/// corresponding Python exception is raised. -pub fn wait_for_future(py: Python, fut: F) -> PyResult -where - F: Future + Send, - F::Output: Send, -{ - let runtime: &Runtime = &get_tokio_runtime().0; - const INTERVAL_CHECK_SIGNALS: Duration = Duration::from_millis(1_000); - - // Some fast running processes that generate many `wait_for_future` calls like - // PartitionedDataFrameStreamReader::next require checking for interrupts early - py.run(cr"pass", None, None)?; - py.check_signals()?; - - py.detach(|| { - runtime.block_on(async { - tokio::pin!(fut); - loop { - tokio::select! { - res = &mut fut => break Ok(res), - _ = sleep(INTERVAL_CHECK_SIGNALS) => { - Python::attach(|py| { - // Execute a no-op Python statement to trigger signal processing. - // This is necessary because py.check_signals() alone doesn't - // actually check for signals - it only raises an exception if - // a signal was already set during a previous Python API call. - // Running even trivial Python code forces the interpreter to - // process any pending signals (like KeyboardInterrupt). - py.run(cr"pass", None, None)?; - py.check_signals() - })?; - } - } - } - }) - }) -} - -/// Spawn a [`Future`] on the Tokio runtime and wait for completion -/// while respecting Python signal handling. -pub(crate) fn spawn_future(py: Python, fut: F) -> PyDataFusionResult -where - F: Future> + Send + 'static, - T: Send + 'static, -{ - let rt = &get_tokio_runtime().0; - let handle: JoinHandle> = rt.spawn(fut); - // Wait for the join handle while respecting Python signal handling. - // We handle errors in two steps so `?` maps the error types correctly: - // 1) convert any Python-related error from `wait_for_future` into `PyDataFusionError` - // 2) convert any DataFusion error (inner result) into `PyDataFusionError` - let inner_result = wait_for_future(py, async { - // handle.await yields `Result, JoinError>` - // map JoinError into a DataFusion error so the async block returns - // `datafusion::common::Result` (i.e. Result) - match handle.await { - Ok(inner) => inner, - Err(join_err) => Err(to_datafusion_err(join_err)), - } - })?; // converts PyErr -> PyDataFusionError - - // `inner_result` is `datafusion::common::Result`; use `?` to convert - // the inner DataFusion error into `PyDataFusionError` via `From` and - // return the inner `T` on success. - Ok(inner_result?) -} - -pub(crate) fn parse_volatility(value: &str) -> PyDataFusionResult { - Ok(match value { - "immutable" => Volatility::Immutable, - "stable" => Volatility::Stable, - "volatile" => Volatility::Volatile, - value => { - return Err(PyDataFusionError::Common(format!( - "Unsupported volatility type: `{value}`, supported \ - values are: immutable, stable and volatile." - ))); - } - }) -} - -pub(crate) fn validate_pycapsule(capsule: &Bound, name: &str) -> PyResult<()> { - let capsule_name = capsule.name()?; - if capsule_name.is_none() { - return Err(PyValueError::new_err(format!( - "Expected {name} PyCapsule to have name set." - ))); - } - - let capsule_name = unsafe { capsule_name.unwrap().as_cstr().to_str()? }; - if capsule_name != name { - return Err(PyValueError::new_err(format!( - "Expected name '{name}' in PyCapsule, instead got '{capsule_name}'" - ))); - } - - Ok(()) -} - -pub(crate) fn table_provider_from_pycapsule<'py>( - mut obj: Bound<'py, PyAny>, - session: Bound<'py, PyAny>, -) -> PyResult>> { - if obj.hasattr("__datafusion_table_provider__")? { - obj = obj - .getattr("__datafusion_table_provider__")? - .call1((session,)).map_err(|err| { - let py = obj.py(); - if err.get_type(py).is(PyType::new::(py)) { - PyImportError::new_err("Incompatible libraries. DataFusion 52.0.0 introduced an incompatible signature change for table providers. Either downgrade DataFusion or upgrade your function library.") - } else { - err - } - })?; - } - - if let Ok(capsule) = obj.cast::().map_err(py_datafusion_err) { - validate_pycapsule(capsule, "datafusion_table_provider")?; - - let data: NonNull = capsule - .pointer_checked(Some(c_str!("datafusion_table_provider")))? - .cast(); - let provider = unsafe { data.as_ref() }; - let provider: Arc = provider.into(); - - Ok(Some(provider)) - } else { - Ok(None) - } -} - -pub(crate) fn extract_logical_extension_codec( - py: Python, - obj: Option>, -) -> PyResult> { - let obj = match obj { - Some(obj) => obj, - None => PySessionContext::global_ctx()?.into_bound_py_any(py)?, - }; - let capsule = if obj.hasattr("__datafusion_logical_extension_codec__")? { - obj.getattr("__datafusion_logical_extension_codec__")? - .call0()? - } else { - obj - }; - let capsule = capsule.cast::().map_err(py_datafusion_err)?; - - validate_pycapsule(capsule, "datafusion_logical_extension_codec")?; - - let data: NonNull = capsule - .pointer_checked(Some(c_str!("datafusion_logical_extension_codec")))? - .cast(); - let codec = unsafe { data.as_ref() }; - Ok(Arc::new(codec.clone())) -} - -pub(crate) fn create_logical_extension_capsule<'py>( - py: Python<'py>, - codec: &FFI_LogicalExtensionCodec, -) -> PyResult> { - let name = cr"datafusion_logical_extension_codec".into(); - let codec = codec.clone(); - - PyCapsule::new(py, codec, Some(name)) -} diff --git a/testing b/testing deleted file mode 160000 index 5bab2f264..000000000 --- a/testing +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 5bab2f264a23f5af68f69ea93d24ef1e8e77fc88 diff --git a/user-guide/basics.html b/user-guide/basics.html new file mode 100644 index 000000000..98bef4d88 --- /dev/null +++ b/user-guide/basics.html @@ -0,0 +1,607 @@ + + + + + + + + Concepts — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Concepts

+

In this section, we will cover a basic example to introduce a few key concepts. We will use the +2021 Yellow Taxi Trip Records (download), +from the TLC Trip Record Data.

+
In [1]: from datafusion import SessionContext, col, lit, functions as f
+
+In [2]: ctx = SessionContext()
+
+In [3]: df = ctx.read_parquet("yellow_tripdata_2021-01.parquet")
+
+In [4]: df = df.select(
+   ...:     "trip_distance",
+   ...:     col("total_amount").alias("total"),
+   ...:     (f.round(lit(100.0) * col("tip_amount") / col("total_amount"), lit(1))).alias("tip_percent"),
+   ...: )
+   ...: 
+
+In [5]: df.show()
+DataFrame()
++---------------+-------+-------------+
+| trip_distance | total | tip_percent |
++---------------+-------+-------------+
+| 2.1           | 11.8  | 0.0         |
+| 0.2           | 4.3   | 0.0         |
+| 14.7          | 51.95 | 16.7        |
+| 10.6          | 36.35 | 16.6        |
+| 4.94          | 24.36 | 16.7        |
+| 1.6           | 14.15 | 16.6        |
+| 4.1           | 17.3  | 0.0         |
+| 5.7           | 21.8  | 0.0         |
+| 9.1           | 28.8  | 0.0         |
+| 2.7           | 18.95 | 16.6        |
+| 6.11          | 24.3  | 0.0         |
+| 1.21          | 10.79 | 23.1        |
+| 7.4           | 33.92 | 0.0         |
+| 1.7           | 14.16 | 16.7        |
+| 0.81          | 8.3   | 0.0         |
+| 1.01          | 10.3  | 9.7         |
+| 0.73          | 12.09 | 23.1        |
+| 1.17          | 12.36 | 16.7        |
+| 0.78          | 9.96  | 16.7        |
+| 1.66          | 12.3  | 0.0         |
++---------------+-------+-------------+
+
+
+
+

Session Context

+

The first statement group creates a SessionContext.

+
# create a context
+ctx = datafusion.SessionContext()
+
+
+

A Session Context is the main interface for executing queries with DataFusion. It maintains the state +of the connection between a user and an instance of the DataFusion engine. Additionally it provides +the following functionality:

+
    +
  • Create a DataFrame from a data source.

  • +
  • Register a data source as a table that can be referenced from a SQL query.

  • +
  • Execute a SQL query

  • +
+
+
+

DataFrame

+

The second statement group creates a DataFrame,

+
# Create a DataFrame from a file
+df = ctx.read_parquet("yellow_tripdata_2021-01.parquet")
+
+
+

A DataFrame refers to a (logical) set of rows that share the same column names, similar to a Pandas DataFrame. +DataFrames are typically created by calling a method on SessionContext, such as read_csv, and can then be modified by +calling the transformation methods, such as filter(), select(), aggregate(), +and limit() to build up a query definition.

+

For more details on working with DataFrames, including visualization options and conversion to other formats, see DataFrames.

+
+
+

Expressions

+

The third statement uses Expressions to build up a query definition. You can find +explanations for what the functions below do in the user documentation for +col(), lit(), round(), +and alias().

+
df = df.select(
+    "trip_distance",
+    col("total_amount").alias("total"),
+    (f.round(lit(100.0) * col("tip_amount") / col("total_amount"), lit(1))).alias("tip_percent"),
+)
+
+
+

Finally the show() method converts the logical plan +represented by the DataFrame into a physical plan and execute it, collecting all results and +displaying them to the user. It is important to note that DataFusion performs lazy evaluation +of the DataFrame. Until you call a method such as show() +or collect(), DataFusion will not perform the query.

+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/common-operations/aggregations.html b/user-guide/common-operations/aggregations.html new file mode 100644 index 000000000..d2b473a61 --- /dev/null +++ b/user-guide/common-operations/aggregations.html @@ -0,0 +1,927 @@ + + + + + + + + Aggregation — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + + + +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Aggregation

+

An aggregate or aggregation is a function where the values of multiple rows are processed together +to form a single summary value. For performing an aggregation, DataFusion provides the +aggregate()

+
In [1]: from datafusion import SessionContext, col, lit, functions as f
+
+In [2]: ctx = SessionContext()
+
+In [3]: df = ctx.read_csv("pokemon.csv")
+
+In [4]: col_type_1 = col('"Type 1"')
+
+In [5]: col_type_2 = col('"Type 2"')
+
+In [6]: col_speed = col('"Speed"')
+
+In [7]: col_attack = col('"Attack"')
+
+In [8]: df.aggregate([col_type_1], [
+   ...:     f.approx_distinct(col_speed).alias("Count"),
+   ...:     f.approx_median(col_speed).alias("Median Speed"),
+   ...:     f.approx_percentile_cont(col_speed, 0.9).alias("90% Speed")])
+   ...: 
+Out[8]: 
+DataFrame()
++----------+-------+--------------+-----------+
+| Type 1   | Count | Median Speed | 90% Speed |
++----------+-------+--------------+-----------+
+| Bug      | 11    | 63           | 107       |
+| Poison   | 12    | 55           | 85        |
+| Electric | 8     | 100          | 136       |
+| Fairy    | 2     | 47           | 60        |
+| Normal   | 20    | 71           | 110       |
+| Ice      | 2     | 90           | 95        |
+| Grass    | 8     | 55           | 80        |
+| Fire     | 8     | 91           | 100       |
+| Water    | 21    | 70           | 90        |
+| Ground   | 7     | 40           | 112       |
++----------+-------+--------------+-----------+
+Data truncated.
+
+
+

When the group_by list is empty the aggregation is done over the whole DataFrame. +For grouping the group_by list must contain at least one column.

+
In [9]: df.aggregate([col_type_1], [
+   ...:     f.max(col_speed).alias("Max Speed"),
+   ...:     f.avg(col_speed).alias("Avg Speed"),
+   ...:     f.min(col_speed).alias("Min Speed")])
+   ...: 
+Out[9]: 
+DataFrame()
++----------+-----------+--------------------+-----------+
+| Type 1   | Max Speed | Avg Speed          | Min Speed |
++----------+-----------+--------------------+-----------+
+| Bug      | 145       | 66.78571428571429  | 25        |
+| Poison   | 90        | 58.785714285714285 | 25        |
+| Electric | 140       | 98.88888888888889  | 45        |
+| Fairy    | 60        | 47.5               | 35        |
+| Normal   | 121       | 72.75              | 20        |
+| Ice      | 95        | 90.0               | 85        |
+| Grass    | 80        | 54.23076923076923  | 30        |
+| Fire     | 105       | 86.28571428571429  | 60        |
+| Water    | 115       | 67.25806451612904  | 15        |
+| Ground   | 120       | 58.125             | 25        |
++----------+-----------+--------------------+-----------+
+Data truncated.
+
+
+

More than one column can be used for grouping

+
In [10]: df.aggregate([col_type_1, col_type_2], [
+   ....:     f.max(col_speed).alias("Max Speed"),
+   ....:     f.avg(col_speed).alias("Avg Speed"),
+   ....:     f.min(col_speed).alias("Min Speed")])
+   ....: 
+Out[10]: 
+DataFrame()
++----------+---------+-----------+--------------------+-----------+
+| Type 1   | Type 2  | Max Speed | Avg Speed          | Min Speed |
++----------+---------+-----------+--------------------+-----------+
+| Bug      |         | 85        | 53.333333333333336 | 30        |
+| Normal   | Flying  | 121       | 83.77777777777777  | 56        |
+| Poison   |         | 80        | 51.7               | 25        |
+| Electric |         | 140       | 112.5              | 90        |
+| Fairy    |         | 60        | 47.5               | 35        |
+| Water    | Ice     | 70        | 66.66666666666667  | 60        |
+| Ice      | Psychic | 95        | 95.0               | 95        |
+| Ice      | Flying  | 85        | 85.0               | 85        |
+| Fire     | Flying  | 100       | 96.66666666666667  | 90        |
+| Fire     | Dragon  | 100       | 100.0              | 100       |
++----------+---------+-----------+--------------------+-----------+
+Data truncated.
+
+
+
+

Setting Parameters

+

Each of the built in aggregate functions provides arguments for the parameters that affect their +operation. These can also be overridden using the builder approach to setting any of the following +parameters. When you use the builder, you must call build() to finish. For example, these two +expressions are equivalent.

+
In [11]: first_1 = f.first_value(col("a"), order_by=[col("a")])
+
+In [12]: first_2 = f.first_value(col("a")).order_by(col("a")).build()
+
+
+
+

Ordering

+

You can control the order in which rows are processed by window functions by providing +a list of order_by functions for the order_by parameter. In the following example, we +sort the Pokemon by their attack in increasing order and take the first value, which gives us the +Pokemon with the smallest attack value in each Type 1.

+
In [13]: df.aggregate(
+   ....:     [col('"Type 1"')],
+   ....:     [f.first_value(
+   ....:         col('"Name"'),
+   ....:         order_by=[col('"Attack"').sort(ascending=True)]
+   ....:         ).alias("Smallest Attack")
+   ....:     ])
+   ....: 
+Out[13]: 
+DataFrame()
++----------+-----------------+
+| Type 1   | Smallest Attack |
++----------+-----------------+
+| Bug      | Metapod         |
+| Poison   | Zubat           |
+| Electric | Voltorb         |
+| Fairy    | Clefairy        |
+| Normal   | Chansey         |
+| Ice      | Jynx            |
+| Grass    | Exeggcute       |
+| Fire     | Vulpix          |
+| Water    | Magikarp        |
+| Ground   | Cubone          |
++----------+-----------------+
+Data truncated.
+
+
+
+
+

Distinct

+

When you set the parameter distinct to True, then unique values will only be evaluated one +time each. Suppose we want to create an array of all of the Type 2 for each Type 1 of our +Pokemon set. Since there will be many entries of Type 2 we only one each distinct value.

+
In [14]: df.aggregate([col_type_1], [f.array_agg(col_type_2, distinct=True).alias("Type 2 List")])
+Out[14]: 
+DataFrame()
++----------+--------------------------------------------------+
+| Type 1   | Type 2 List                                      |
++----------+--------------------------------------------------+
+| Bug      | [Flying, Poison, , Grass]                        |
+| Poison   | [Flying, , Ground]                               |
+| Electric | [Flying, , Steel]                                |
+| Fairy    | []                                               |
+| Normal   | [Fairy, , Flying]                                |
+| Ice      | [Flying, Psychic]                                |
+| Grass    | [, Poison, Psychic]                              |
+| Fire     | [, Dragon, Flying]                               |
+| Water    | [Dark, Flying, Psychic, Poison, Ice, Fighting, ] |
+| Ground   | [Rock, ]                                         |
++----------+--------------------------------------------------+
+Data truncated.
+
+
+

In the output of the above we can see that there are some Type 1 for which the Type 2 entry +is null. In reality, we probably want to filter those out. We can do this in two ways. First, +we can filter DataFrame rows that have no Type 2. If we do this, we might have some Type 1 +entries entirely removed. The second is we can use the filter argument described below.

+
In [15]: df.filter(col_type_2.is_not_null()).aggregate([col_type_1], [f.array_agg(col_type_2, distinct=True).alias("Type 2 List")])
+Out[15]: 
+DataFrame()
++----------+------------------------------------------------+
+| Type 1   | Type 2 List                                    |
++----------+------------------------------------------------+
+| Bug      | [Flying, Grass, Poison]                        |
+| Poison   | [Flying, Ground]                               |
+| Electric | [Steel, Flying]                                |
+| Normal   | [Flying, Fairy]                                |
+| Ice      | [Psychic, Flying]                              |
+| Grass    | [Poison, Psychic]                              |
+| Fire     | [Flying, Dragon]                               |
+| Water    | [Psychic, Flying, Poison, Ice, Dark, Fighting] |
+| Rock     | [Ground, Water, Flying]                        |
+| Ghost    | [Poison]                                       |
++----------+------------------------------------------------+
+Data truncated.
+
+In [16]: df.aggregate([col_type_1], [f.array_agg(col_type_2, distinct=True, filter=col_type_2.is_not_null()).alias("Type 2 List")])
+Out[16]: 
+DataFrame()
++----------+------------------------------------------------+
+| Type 1   | Type 2 List                                    |
++----------+------------------------------------------------+
+| Bug      | [Flying, Grass, Poison]                        |
+| Poison   | [Ground, Flying]                               |
+| Electric | [Steel, Flying]                                |
+| Fairy    |                                                |
+| Normal   | [Flying, Fairy]                                |
+| Ice      | [Flying, Psychic]                              |
+| Grass    | [Poison, Psychic]                              |
+| Fire     | [Dragon, Flying]                               |
+| Water    | [Fighting, Psychic, Ice, Flying, Poison, Dark] |
+| Ground   | [Rock]                                         |
++----------+------------------------------------------------+
+Data truncated.
+
+
+

Which approach you take should depend on your use case.

+
+
+

Null Treatment

+

This option allows you to either respect or ignore null values.

+

One common usage for handling nulls is the case where you want to find the first value within a +partition. By setting the null treatment to ignore nulls, we can find the first non-null value +in our partition.

+
In [17]: from datafusion.common import NullTreatment
+
+In [18]: df.aggregate([col_type_1], [
+   ....:     f.first_value(
+   ....:         col_type_2,
+   ....:         order_by=[col_attack],
+   ....:         null_treatment=NullTreatment.RESPECT_NULLS
+   ....:     ).alias("Lowest Attack Type 2")])
+   ....: 
+Out[18]: 
+DataFrame()
++----------+----------------------+
+| Type 1   | Lowest Attack Type 2 |
++----------+----------------------+
+| Bug      |                      |
+| Poison   | Flying               |
+| Electric |                      |
+| Fairy    |                      |
+| Normal   |                      |
+| Ice      | Psychic              |
+| Grass    | Psychic              |
+| Fire     |                      |
+| Water    |                      |
+| Ground   |                      |
++----------+----------------------+
+Data truncated.
+
+In [19]: df.aggregate([col_type_1], [
+   ....:     f.first_value(
+   ....:         col_type_2,
+   ....:         order_by=[col_attack],
+   ....:         null_treatment=NullTreatment.IGNORE_NULLS
+   ....:     ).alias("Lowest Attack Type 2")])
+   ....: 
+Out[19]: 
+DataFrame()
++----------+----------------------+
+| Type 1   | Lowest Attack Type 2 |
++----------+----------------------+
+| Bug      | Poison               |
+| Poison   | Flying               |
+| Electric | Steel                |
+| Fairy    |                      |
+| Normal   | Flying               |
+| Ice      | Psychic              |
+| Grass    | Psychic              |
+| Fire     | Flying               |
+| Water    | Poison               |
+| Ground   | Rock                 |
++----------+----------------------+
+Data truncated.
+
+
+
+
+

Filter

+

Using the filter option is useful for filtering results to include in the aggregate function. It can +be seen in the example above on how this can be useful to only filter rows evaluated by the +aggregate function without filtering rows from the entire DataFrame.

+

Filter takes a single expression.

+

Suppose we want to find the speed values for only Pokemon that have low Attack values.

+
In [20]: df.aggregate([col_type_1], [
+   ....:     f.avg(col_speed).alias("Avg Speed All"),
+   ....:     f.avg(col_speed, filter=col_attack < lit(50)).alias("Avg Speed Low Attack")])
+   ....: 
+Out[20]: 
+DataFrame()
++----------+--------------------+----------------------+
+| Type 1   | Avg Speed All      | Avg Speed Low Attack |
++----------+--------------------+----------------------+
+| Bug      | 66.78571428571429  | 46.0                 |
+| Poison   | 58.785714285714285 | 48.0                 |
+| Electric | 98.88888888888889  | 72.5                 |
+| Fairy    | 47.5               | 35.0                 |
+| Normal   | 72.75              | 52.8                 |
+| Ice      | 90.0               |                      |
+| Grass    | 54.23076923076923  | 42.5                 |
+| Fire     | 86.28571428571429  | 65.0                 |
+| Water    | 67.25806451612904  | 63.833333333333336   |
+| Ground   | 58.125             |                      |
++----------+--------------------+----------------------+
+Data truncated.
+
+
+
+
+
+

Aggregate Functions

+

The available aggregate functions are:

+
    +
  1. +
    Comparison Functions
    +
    +
    +
  2. +
  3. +
    Math Functions
    +
    +
    +
  4. +
  5. +
    Array Functions
    +
    +
    +
  6. +
  7. +
    Logical Functions
    +
    +
    +
  8. +
  9. +
    Statistical Functions
    +
    +
    +
  10. +
  11. +
    Linear Regression Functions
    +
    +
    +
  12. +
  13. +
    Positional Functions
    +
    +
    +
  14. +
  15. +
    String Functions
    +
    +
    +
  16. +
  17. +
    Approximation Functions
    +
    +
    +
  18. +
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/common-operations/basic-info.html b/user-guide/common-operations/basic-info.html new file mode 100644 index 000000000..7ae87351f --- /dev/null +++ b/user-guide/common-operations/basic-info.html @@ -0,0 +1,574 @@ + + + + + + + + Basic Operations — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ + + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Basic Operations

+

In this section, you will learn how to display essential details of DataFrames using specific functions.

+
In [1]: from datafusion import SessionContext
+
+In [2]: import random
+
+In [3]: ctx = SessionContext()
+
+In [4]: df = ctx.from_pydict({
+   ...:     "nrs": [1, 2, 3, 4, 5],
+   ...:     "names": ["python", "ruby", "java", "haskell", "go"],
+   ...:     "random": random.sample(range(1000), 5),
+   ...:     "groups": ["A", "A", "B", "C", "B"],
+   ...: })
+   ...: 
+
+In [5]: df
+Out[5]: 
+DataFrame()
++-----+---------+--------+--------+
+| nrs | names   | random | groups |
++-----+---------+--------+--------+
+| 1   | python  | 23     | A      |
+| 2   | ruby    | 939    | A      |
+| 3   | java    | 615    | B      |
+| 4   | haskell | 887    | C      |
+| 5   | go      | 382    | B      |
++-----+---------+--------+--------+
+
+
+

Use limit() to view the top rows of the frame:

+
In [6]: df.limit(2)
+Out[6]: 
+DataFrame()
++-----+--------+--------+--------+
+| nrs | names  | random | groups |
++-----+--------+--------+--------+
+| 1   | python | 23     | A      |
+| 2   | ruby   | 939    | A      |
++-----+--------+--------+--------+
+
+
+

Display the columns of the DataFrame using schema():

+
In [7]: df.schema()
+Out[7]: 
+nrs: int64
+names: string
+random: int64
+groups: string
+
+
+

The method to_pandas() uses pyarrow to convert to pandas DataFrame, by collecting the batches, +passing them to an Arrow table, and then converting them to a pandas DataFrame.

+
In [8]: df.to_pandas()
+Out[8]: 
+   nrs    names  random groups
+0    1   python      23      A
+1    2     ruby     939      A
+2    3     java     615      B
+3    4  haskell     887      C
+4    5       go     382      B
+
+
+

describe() shows a quick statistic summary of your data:

+
In [9]: df.describe()
+Out[9]: 
+DataFrame()
++------------+--------------------+-------+-------------------+--------+
+| describe   | nrs                | names | random            | groups |
++------------+--------------------+-------+-------------------+--------+
+| count      | 5.0                | 5     | 5.0               | 5      |
+| null_count | 0.0                | 0     | 0.0               | 0      |
+| mean       | 3.0                | null  | 569.2             | null   |
+| std        | 1.5811388300841898 | null  | 378.5580536720887 | null   |
+| min        | 1.0                | go    | 23.0              | A      |
+| max        | 5.0                | ruby  | 939.0             | C      |
+| median     | 3.0                | null  | 615.0             | null   |
++------------+--------------------+-------+-------------------+--------+
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/common-operations/expressions.html b/user-guide/common-operations/expressions.html new file mode 100644 index 000000000..cbfda5390 --- /dev/null +++ b/user-guide/common-operations/expressions.html @@ -0,0 +1,764 @@ + + + + + + + + Expressions — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Expressions

+

In DataFusion an expression is an abstraction that represents a computation. +Expressions are used as the primary inputs and outputs for most functions within +DataFusion. As such, expressions can be combined to create expression trees, a +concept shared across most compilers and databases.

+
+

Column

+

The first expression most new users will interact with is the Column, which is created by calling col(). +This expression represents a column within a DataFrame. The function col() takes as in input a string +and returns an expression as it’s output.

+
+
+

Literal

+

Literal expressions represent a single value. These are helpful in a wide range of operations where +a specific, known value is of interest. You can create a literal expression using the function lit(). +The type of the object passed to the lit() function will be used to convert it to a known data type.

+

In the following example we create expressions for the column named color and the literal scalar string red. +The resultant variable red_units is itself also an expression.

+
In [1]: red_units = col("color") == lit("red")
+
+
+
+
+

Boolean

+

When combining expressions that evaluate to a boolean value, you can combine these expressions using boolean operators. +It is important to note that in order to combine these expressions, you must use bitwise operators. See the following +examples for the and, or, and not operations.

+
In [2]: red_or_green_units = (col("color") == lit("red")) | (col("color") == lit("green"))
+
+In [3]: heavy_red_units = (col("color") == lit("red")) & (col("weight") > lit(42))
+
+In [4]: not_red_units = ~(col("color") == lit("red"))
+
+
+
+
+

Arrays

+

For columns that contain arrays of values, you can access individual elements of the array by index +using bracket indexing. This is similar to calling the function +datafusion.functions.array_element(), except that array indexing using brackets is 0 based, +similar to Python arrays and array_element is 1 based indexing to be compatible with other SQL +approaches.

+
In [5]: from datafusion import SessionContext, col
+
+In [6]: ctx = SessionContext()
+
+In [7]: df = ctx.from_pydict({"a": [[1, 2, 3], [4, 5, 6]]})
+
+In [8]: df.select(col("a")[0].alias("a0"))
+Out[8]: 
+DataFrame()
++----+
+| a0 |
++----+
+| 1  |
+| 4  |
++----+
+
+
+
+

Warning

+

Indexing an element of an array via [] starts at index 0 whereas +array_element() starts at index 1.

+
+

Starting in DataFusion 49.0.0 you can also create slices of array elements using +slice syntax from Python.

+
In [9]: df.select(col("a")[1:3].alias("second_two_elements"))
+Out[9]: 
+DataFrame()
++---------------------+
+| second_two_elements |
++---------------------+
+| [2, 3]              |
+| [5, 6]              |
++---------------------+
+
+
+

To check if an array is empty, you can use the function datafusion.functions.array_empty() or datafusion.functions.empty. +This function returns a boolean indicating whether the array is empty.

+
In [10]: from datafusion import SessionContext, col
+
+In [11]: from datafusion.functions import array_empty
+
+In [12]: ctx = SessionContext()
+
+In [13]: df = ctx.from_pydict({"a": [[], [1, 2, 3]]})
+
+In [14]: df.select(array_empty(col("a")).alias("is_empty"))
+Out[14]: 
+DataFrame()
++----------+
+| is_empty |
++----------+
+| true     |
+| false    |
++----------+
+
+
+

In this example, the is_empty column will contain True for the first row and False for the second row.

+

To get the total number of elements in an array, you can use the function datafusion.functions.cardinality(). +This function returns an integer indicating the total number of elements in the array.

+
In [15]: from datafusion import SessionContext, col
+
+In [16]: from datafusion.functions import cardinality
+
+In [17]: ctx = SessionContext()
+
+In [18]: df = ctx.from_pydict({"a": [[1, 2, 3], [4, 5, 6]]})
+
+In [19]: df.select(cardinality(col("a")).alias("num_elements"))
+Out[19]: 
+DataFrame()
++--------------+
+| num_elements |
++--------------+
+| 3            |
+| 3            |
++--------------+
+
+
+

In this example, the num_elements column will contain 3 for both rows.

+

To concatenate two arrays, you can use the function datafusion.functions.array_cat() or datafusion.functions.array_concat(). +These functions return a new array that is the concatenation of the input arrays.

+
In [20]: from datafusion import SessionContext, col
+
+In [21]: from datafusion.functions import array_cat, array_concat
+
+In [22]: ctx = SessionContext()
+
+In [23]: df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[4, 5, 6]]})
+
+In [24]: df.select(array_cat(col("a"), col("b")).alias("concatenated_array"))
+Out[24]: 
+DataFrame()
++--------------------+
+| concatenated_array |
++--------------------+
+| [1, 2, 3, 4, 5, 6] |
++--------------------+
+
+
+

In this example, the concatenated_array column will contain [1, 2, 3, 4, 5, 6].

+

To repeat the elements of an array a specified number of times, you can use the function datafusion.functions.array_repeat(). +This function returns a new array with the elements repeated.

+
In [25]: from datafusion import SessionContext, col, literal
+
+In [26]: from datafusion.functions import array_repeat
+
+In [27]: ctx = SessionContext()
+
+In [28]: df = ctx.from_pydict({"a": [[1, 2, 3]]})
+
+In [29]: df.select(array_repeat(col("a"), literal(2)).alias("repeated_array"))
+Out[29]: 
+DataFrame()
++------------------------+
+| repeated_array         |
++------------------------+
+| [[1, 2, 3], [1, 2, 3]] |
++------------------------+
+
+
+

In this example, the repeated_array column will contain [[1, 2, 3], [1, 2, 3]].

+
+
+

Structs

+

Columns that contain struct elements can be accessed using the bracket notation as if they were +Python dictionary style objects. This expects a string key as the parameter passed.

+
In [30]: ctx = SessionContext()
+
+In [31]: data = {"a": [{"size": 15, "color": "green"}, {"size": 10, "color": "blue"}]}
+
+In [32]: df = ctx.from_pydict(data)
+
+In [33]: df.select(col("a")["size"].alias("a_size"))
+Out[33]: 
+DataFrame()
++--------+
+| a_size |
++--------+
+| 15     |
+| 10     |
++--------+
+
+
+
+
+

Functions

+

As mentioned before, most functions in DataFusion return an expression at their output. This allows us to create +a wide variety of expressions built up from other expressions. For example, alias() is a function that takes +as it input a single expression and returns an expression in which the name of the expression has changed.

+

The following example shows a series of expressions that are built up from functions operating on expressions.

+
In [34]: from datafusion import SessionContext
+
+In [35]: from datafusion import column, lit
+
+In [36]: from datafusion import functions as f
+
+In [37]: import random
+
+In [38]: ctx = SessionContext()
+
+In [39]: df = ctx.from_pydict(
+   ....:     {
+   ....:         "name": ["Albert", "Becca", "Carlos", "Dante"],
+   ....:         "age": [42, 67, 27, 71],
+   ....:         "years_in_position": [13, 21, 10, 54],
+   ....:     },
+   ....:     name="employees"
+   ....: )
+   ....: 
+
+In [40]: age_col = col("age")
+
+In [41]: renamed_age = age_col.alias("age_in_years")
+
+In [42]: start_age = age_col - col("years_in_position")
+
+In [43]: started_young = start_age < lit(18)
+
+In [44]: can_retire = age_col > lit(65)
+
+In [45]: long_timer = started_young & can_retire
+
+In [46]: df.filter(long_timer).select(col("name"), renamed_age, col("years_in_position"))
+Out[46]: 
+DataFrame()
++-------+--------------+-------------------+
+| name  | age_in_years | years_in_position |
++-------+--------------+-------------------+
+| Dante | 71           | 54                |
++-------+--------------+-------------------+
+
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/common-operations/functions.html b/user-guide/common-operations/functions.html new file mode 100644 index 000000000..1afdcb7ef --- /dev/null +++ b/user-guide/common-operations/functions.html @@ -0,0 +1,822 @@ + + + + + + + + Functions — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + + + +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Functions

+

DataFusion provides a large number of built-in functions for performing complex queries without requiring user-defined functions. +In here we will cover some of the more popular use cases. If you want to view all the functions go to the Functions API Reference.

+

We’ll use the pokemon dataset in the following examples.

+
In [1]: from datafusion import SessionContext
+
+In [2]: ctx = SessionContext()
+
+In [3]: ctx.register_csv("pokemon", "pokemon.csv")
+
+In [4]: df = ctx.table("pokemon")
+
+
+
+

Mathematical

+

DataFusion offers mathematical functions such as pow() or log()

+
In [5]: from datafusion import col, literal, string_literal, str_lit
+
+In [6]: from datafusion import functions as f
+
+In [7]: df.select(
+   ...:     f.pow(col('"Attack"'), literal(2)) - f.pow(col('"Defense"'), literal(2))
+   ...: ).limit(10)
+   ...: 
+Out[7]: 
+DataFrame()
++------------------------------------------------------------------+
+| power(pokemon.Attack,Int64(2)) - power(pokemon.Defense,Int64(2)) |
++------------------------------------------------------------------+
+| 0.0                                                              |
+| -125.0                                                           |
+| -165.0                                                           |
+| -5129.0                                                          |
+| 855.0                                                            |
+| 732.0                                                            |
+| 972.0                                                            |
+| 4579.0                                                           |
+| 4732.0                                                           |
+| -1921.0                                                          |
++------------------------------------------------------------------+
+
+
+
+
+

Conditional

+

There 3 conditional functions in DataFusion coalesce(), nullif() and case().

+
In [8]: df.select(
+   ...:     f.coalesce(col('"Type 1"'), col('"Type 2"')).alias("dominant_type")
+   ...: ).limit(10)
+   ...: 
+Out[8]: 
+DataFrame()
++---------------+
+| dominant_type |
++---------------+
+| Grass         |
+| Grass         |
+| Grass         |
+| Grass         |
+| Fire          |
+| Fire          |
+| Fire          |
+| Fire          |
+| Fire          |
+| Water         |
++---------------+
+
+
+
+
+

Temporal

+

For selecting the current time use now()

+
In [9]: df.select(f.now())
+Out[9]: 
+DataFrame()
++-------------------------------+
+| now()                         |
++-------------------------------+
+| 2026-03-12T09:56:53.757149654 |
+| 2026-03-12T09:56:53.757149654 |
+| 2026-03-12T09:56:53.757149654 |
+| 2026-03-12T09:56:53.757149654 |
+| 2026-03-12T09:56:53.757149654 |
+| 2026-03-12T09:56:53.757149654 |
+| 2026-03-12T09:56:53.757149654 |
+| 2026-03-12T09:56:53.757149654 |
+| 2026-03-12T09:56:53.757149654 |
+| 2026-03-12T09:56:53.757149654 |
++-------------------------------+
+Data truncated.
+
+
+

Convert to timestamps using to_timestamp()

+
In [10]: df.select(f.to_timestamp(col('"Total"')).alias("timestamp"))
+Out[10]: 
+DataFrame()
++---------------------+
+| timestamp           |
++---------------------+
+| 1970-01-01T00:05:18 |
+| 1970-01-01T00:06:45 |
+| 1970-01-01T00:08:45 |
+| 1970-01-01T00:10:25 |
+| 1970-01-01T00:05:09 |
+| 1970-01-01T00:06:45 |
+| 1970-01-01T00:08:54 |
+| 1970-01-01T00:10:34 |
+| 1970-01-01T00:10:34 |
+| 1970-01-01T00:05:14 |
++---------------------+
+Data truncated.
+
+
+

Extracting parts of a date using date_part() (alias extract())

+
In [11]: df.select(
+   ....:    f.date_part(literal("month"), f.to_timestamp(col('"Total"'))).alias("month"),
+   ....:    f.extract(literal("day"), f.to_timestamp(col('"Total"'))).alias("day")
+   ....: )
+   ....: 
+Out[11]: 
+DataFrame()
++-------+-----+
+| month | day |
++-------+-----+
+| 1     | 1   |
+| 1     | 1   |
+| 1     | 1   |
+| 1     | 1   |
+| 1     | 1   |
+| 1     | 1   |
+| 1     | 1   |
+| 1     | 1   |
+| 1     | 1   |
+| 1     | 1   |
++-------+-----+
+Data truncated.
+
+
+
+
+

String

+

In the field of data science, working with textual data is a common task. To make string manipulation easier, +DataFusion offers a range of helpful options.

+
In [12]: df.select(
+   ....:     f.char_length(col('"Name"')).alias("len"),
+   ....:     f.lower(col('"Name"')).alias("lower"),
+   ....:     f.left(col('"Name"'), literal(4)).alias("code")
+   ....: )
+   ....: 
+Out[12]: 
+DataFrame()
++-----+---------------------------+------+
+| len | lower                     | code |
++-----+---------------------------+------+
+| 9   | bulbasaur                 | Bulb |
+| 7   | ivysaur                   | Ivys |
+| 8   | venusaur                  | Venu |
+| 21  | venusaurmega venusaur     | Venu |
+| 10  | charmander                | Char |
+| 10  | charmeleon                | Char |
+| 9   | charizard                 | Char |
+| 25  | charizardmega charizard x | Char |
+| 25  | charizardmega charizard y | Char |
+| 8   | squirtle                  | Squi |
++-----+---------------------------+------+
+Data truncated.
+
+
+

This also includes the functions for regular expressions like regexp_replace() and regexp_match()

+
In [13]: df.select(
+   ....:     f.regexp_match(col('"Name"'), literal("Char")).alias("dragons"),
+   ....:     f.regexp_replace(col('"Name"'), literal("saur"), literal("fleur")).alias("flowers")
+   ....: )
+   ....: 
+Out[13]: 
+DataFrame()
++---------+---------------------------+
+| dragons | flowers                   |
++---------+---------------------------+
+|         | Bulbafleur                |
+|         | Ivyfleur                  |
+|         | Venufleur                 |
+|         | VenufleurMega Venusaur    |
+| [Char]  | Charmander                |
+| [Char]  | Charmeleon                |
+| [Char]  | Charizard                 |
+| [Char]  | CharizardMega Charizard X |
+| [Char]  | CharizardMega Charizard Y |
+|         | Squirtle                  |
++---------+---------------------------+
+Data truncated.
+
+
+
+
+

Casting

+

Casting expressions to different data types using arrow_cast()

+
In [14]: df.select(
+   ....:     f.arrow_cast(col('"Total"'), string_literal("Float64")).alias("total_as_float"),
+   ....:     f.arrow_cast(col('"Total"'), str_lit("Int32")).alias("total_as_int")
+   ....: )
+   ....: 
+Out[14]: 
+DataFrame()
++----------------+--------------+
+| total_as_float | total_as_int |
++----------------+--------------+
+| 318.0          | 318          |
+| 405.0          | 405          |
+| 525.0          | 525          |
+| 625.0          | 625          |
+| 309.0          | 309          |
+| 405.0          | 405          |
+| 534.0          | 534          |
+| 634.0          | 634          |
+| 634.0          | 634          |
+| 314.0          | 314          |
++----------------+--------------+
+Data truncated.
+
+
+
+
+

Other

+

The function in_list() allows to check a column for the presence of multiple values:

+
In [15]: types = [literal("Grass"), literal("Fire"), literal("Water")]
+
+In [16]: (
+   ....:     df.select(f.in_list(col('"Type 1"'), types, negated=False).alias("basic_types"))
+   ....:       .limit(20)
+   ....:       .to_pandas()
+   ....: )
+   ....: 
+Out[16]: 
+    basic_types
+0          True
+1          True
+2          True
+3          True
+4          True
+5          True
+6          True
+7          True
+8          True
+9          True
+10         True
+11         True
+12         True
+13        False
+14        False
+15        False
+16        False
+17        False
+18        False
+19        False
+
+
+
+
+
+

Handling Missing Values

+

DataFusion provides methods to handle missing values in DataFrames:

+
+

fill_null

+

The fill_null() method replaces NULL values in specified columns with a provided value:

+
# Fill all NULL values with 0 where possible
+df = df.fill_null(0)
+
+# Fill NULL values only in specific string columns
+df = df.fill_null("missing", subset=["name", "category"])
+
+
+

The fill value will be cast to match each column’s type. If casting fails for a column, that column remains unchanged.

+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/common-operations/index.html b/user-guide/common-operations/index.html new file mode 100644 index 000000000..789adaf29 --- /dev/null +++ b/user-guide/common-operations/index.html @@ -0,0 +1,552 @@ + + + + + + + + Common Operations — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ + + +
+ +
+ +
+ + +
+ + + + + + +
+ + + + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/common-operations/joins.html b/user-guide/common-operations/joins.html new file mode 100644 index 000000000..de8c62fd0 --- /dev/null +++ b/user-guide/common-operations/joins.html @@ -0,0 +1,692 @@ + + + + + + + + Joins — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + + + +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Joins

+

DataFusion supports the following join variants via the method join()

+
    +
  • Inner Join

  • +
  • Left Join

  • +
  • Right Join

  • +
  • Full Join

  • +
  • Left Semi Join

  • +
  • Left Anti Join

  • +
+

For the examples in this section we’ll use the following two DataFrames

+
In [1]: from datafusion import SessionContext
+
+In [2]: ctx = SessionContext()
+
+In [3]: left = ctx.from_pydict(
+   ...:     {
+   ...:         "customer_id": [1, 2, 3],
+   ...:         "customer": ["Alice", "Bob", "Charlie"],
+   ...:     }
+   ...: )
+   ...: 
+
+In [4]: right = ctx.from_pylist([
+   ...:     {"id": 1, "name": "CityCabs"},
+   ...:     {"id": 2, "name": "MetroRide"},
+   ...:     {"id": 5, "name": "UrbanGo"},
+   ...: ])
+   ...: 
+
+
+
+

Inner Join

+

When using an inner join, only rows containing the common values between the two join columns present in both DataFrames +will be included in the resulting DataFrame.

+
In [5]: left.join(right, left_on="customer_id", right_on="id", how="inner")
+Out[5]: 
+DataFrame()
++-------------+----------+----+-----------+
+| customer_id | customer | id | name      |
++-------------+----------+----+-----------+
+| 1           | Alice    | 1  | CityCabs  |
+| 2           | Bob      | 2  | MetroRide |
++-------------+----------+----+-----------+
+
+
+

The parameter join_keys specifies the columns from the left DataFrame and right DataFrame that contains the values +that should match.

+
+
+

Left Join

+

A left join combines rows from two DataFrames using the key columns. It returns all rows from the left DataFrame and +matching rows from the right DataFrame. If there’s no match in the right DataFrame, it returns null +values for the corresponding columns.

+
In [6]: left.join(right, left_on="customer_id", right_on="id", how="left")
+Out[6]: 
+DataFrame()
++-------------+----------+----+-----------+
+| customer_id | customer | id | name      |
++-------------+----------+----+-----------+
+| 1           | Alice    | 1  | CityCabs  |
+| 2           | Bob      | 2  | MetroRide |
+| 3           | Charlie  |    |           |
++-------------+----------+----+-----------+
+
+
+
+
+

Full Join

+

A full join merges rows from two tables based on a related column, returning all rows from both tables, even if there +is no match. Unmatched rows will have null values.

+
In [7]: left.join(right, left_on="customer_id", right_on="id", how="full")
+Out[7]: 
+DataFrame()
++-------------+----------+----+-----------+
+| customer_id | customer | id | name      |
++-------------+----------+----+-----------+
+| 1           | Alice    | 1  | CityCabs  |
+| 2           | Bob      | 2  | MetroRide |
+|             |          | 5  | UrbanGo   |
+| 3           | Charlie  |    |           |
++-------------+----------+----+-----------+
+
+
+
+
+

Left Semi Join

+

A left semi join retrieves matching rows from the left table while +omitting duplicates with multiple matches in the right table.

+
In [8]: left.join(right, left_on="customer_id", right_on="id", how="semi")
+Out[8]: 
+DataFrame()
++-------------+----------+
+| customer_id | customer |
++-------------+----------+
+| 1           | Alice    |
+| 2           | Bob      |
++-------------+----------+
+
+
+
+
+

Left Anti Join

+

A left anti join shows all rows from the left table without any matching rows in the right table, +based on a the specified matching columns. It excludes rows from the left table that have at least one matching row in +the right table.

+
In [9]: left.join(right, left_on="customer_id", right_on="id", how="anti")
+Out[9]: 
+DataFrame()
++-------------+----------+
+| customer_id | customer |
++-------------+----------+
+| 3           | Charlie  |
++-------------+----------+
+
+
+
+
+

Duplicate Keys

+

It is common to join two DataFrames on a common column name. Starting in +version 51.0.0, datafusion-python` will now coalesce on column with identical names by +default. This reduces problems with ambiguous column selection after joins. +You can disable this feature by setting the parameter coalesce_duplicate_keys +to False.

+
In [10]: left = ctx.from_pydict(
+   ....:     {
+   ....:         "id": [1, 2, 3],
+   ....:         "customer": ["Alice", "Bob", "Charlie"],
+   ....:     }
+   ....: )
+   ....: 
+
+In [11]: right = ctx.from_pylist([
+   ....:     {"id": 1, "name": "CityCabs"},
+   ....:     {"id": 2, "name": "MetroRide"},
+   ....:     {"id": 5, "name": "UrbanGo"},
+   ....: ])
+   ....: 
+
+In [12]: left.join(right, "id", how="inner")
+Out[12]: 
+DataFrame()
++----+----------+-----------+
+| id | customer | name      |
++----+----------+-----------+
+| 1  | Alice    | CityCabs  |
+| 2  | Bob      | MetroRide |
++----+----------+-----------+
+
+
+

In contrast to the above example, if we wish to get both columns:

+
In [13]: left.join(right, "id", how="inner", coalesce_duplicate_keys=False)
+Out[13]: 
+DataFrame()
++----+----------+----+-----------+
+| id | customer | id | name      |
++----+----------+----+-----------+
+| 1  | Alice    | 1  | CityCabs  |
+| 2  | Bob      | 2  | MetroRide |
++----+----------+----+-----------+
+
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/common-operations/select-and-filter.html b/user-guide/common-operations/select-and-filter.html new file mode 100644 index 000000000..90997c72f --- /dev/null +++ b/user-guide/common-operations/select-and-filter.html @@ -0,0 +1,601 @@ + + + + + + + + Column Selections — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ + + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Column Selections

+

Use select() for basic column selection.

+

DataFusion can work with several file types, to start simple we can use a subset of the +TLC Trip Record Data, +which you can download here.

+
In [1]: from datafusion import SessionContext
+
+In [2]: ctx = SessionContext()
+
+In [3]: df = ctx.read_parquet("yellow_tripdata_2021-01.parquet")
+
+In [4]: df.select("trip_distance", "passenger_count")
+Out[4]: 
+DataFrame()
++---------------+-----------------+
+| trip_distance | passenger_count |
++---------------+-----------------+
+| 2.1           | 1.0             |
+| 0.2           | 1.0             |
+| 14.7          | 1.0             |
+| 10.6          | 0.0             |
+| 4.94          | 1.0             |
+| 1.6           | 1.0             |
+| 4.1           | 1.0             |
+| 5.7           | 1.0             |
+| 9.1           | 1.0             |
+| 2.7           | 2.0             |
++---------------+-----------------+
+Data truncated.
+
+
+

For mathematical or logical operations use col() to select columns, and give meaningful names to the resulting +operations using alias()

+
In [5]: from datafusion import col, lit
+
+In [6]: df.select((col("tip_amount") + col("tolls_amount")).alias("tips_plus_tolls"))
+Out[6]: 
+DataFrame()
++-----------------+
+| tips_plus_tolls |
++-----------------+
+| 0.0             |
+| 0.0             |
+| 8.65            |
+| 6.05            |
+| 4.06            |
+| 2.35            |
+| 0.0             |
+| 0.0             |
+| 0.0             |
+| 3.15            |
++-----------------+
+Data truncated.
+
+
+
+

Warning

+

Please be aware that all identifiers are effectively made lower-case in SQL, so if your file has capital letters +(ex: Name) you must put your column name in double quotes or the selection won’t work. As an alternative for simple +column selection use select() without double quotes

+
+

For selecting columns with capital letters use '"VendorID"'

+
In [7]: df.select(col('"VendorID"'))
+Out[7]: 
+DataFrame()
++----------+
+| VendorID |
++----------+
+| 1        |
+| 1        |
+| 1        |
+| 1        |
+| 2        |
+| 1        |
+| 1        |
+| 1        |
+| 1        |
+| 1        |
++----------+
+Data truncated.
+
+
+

To combine it with literal values use the lit()

+
In [8]: large_trip_distance = col("trip_distance") > lit(5.0)
+
+In [9]: low_passenger_count = col("passenger_count") < lit(4)
+
+In [10]: df.select((large_trip_distance & low_passenger_count).alias("lonely_trips"))
+Out[10]: 
+DataFrame()
++--------------+
+| lonely_trips |
++--------------+
+| false        |
+| false        |
+| true         |
+| true         |
+| false        |
+| false        |
+| false        |
+| true         |
+| true         |
+| false        |
++--------------+
+Data truncated.
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/common-operations/udf-and-udfa.html b/user-guide/common-operations/udf-and-udfa.html new file mode 100644 index 000000000..ad7e70103 --- /dev/null +++ b/user-guide/common-operations/udf-and-udfa.html @@ -0,0 +1,829 @@ + + + + + + + + User-Defined Functions — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + + + +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

User-Defined Functions

+

DataFusion provides powerful expressions and functions, reducing the need for custom Python +functions. However you can still incorporate your own functions, i.e. User-Defined Functions (UDFs).

+
+

Scalar Functions

+

When writing a user-defined function that can operate on a row by row basis, these are called Scalar +Functions. You can define your own scalar function by calling +udf() .

+

The basic definition of a scalar UDF is a python function that takes one or more +pyarrow arrays and returns a single array as +output. DataFusion scalar UDFs operate on an entire batch of records at a time, though the +evaluation of those records should be on a row by row basis. In the following example, we compute +if the input array contains null values.

+
In [1]: import pyarrow
+
+In [2]: import datafusion
+
+In [3]: from datafusion import udf, col
+
+In [4]: def is_null(array: pyarrow.Array) -> pyarrow.Array:
+   ...:     return array.is_null()
+   ...: 
+
+In [5]: is_null_arr = udf(is_null, [pyarrow.int64()], pyarrow.bool_(), 'stable')
+
+In [6]: ctx = datafusion.SessionContext()
+
+In [7]: batch = pyarrow.RecordBatch.from_arrays(
+   ...:     [pyarrow.array([1, None, 3]), pyarrow.array([4, 5, 6])],
+   ...:     names=["a", "b"],
+   ...: )
+   ...: 
+
+In [8]: df = ctx.create_dataframe([[batch]], name="batch_array")
+
+In [9]: df.select(col("a"), is_null_arr(col("a")).alias("is_null")).show()
+DataFrame()
++---+---------+
+| a | is_null |
++---+---------+
+| 1 | false   |
+|   | true    |
+| 3 | false   |
++---+---------+
+
+
+

In the previous example, we used the fact that pyarrow provides a variety of built in array +functions such as is_null(). There are additional pyarrow +compute functions available. When possible, +it is highly recommended to use these functions because they can perform computations without doing +any copy operations from the original arrays. This leads to greatly improved performance.

+

If you need to perform an operation in python that is not available with the pyarrow compute +functions, you will need to convert the record batch into python values, perform your operation, +and construct an array. This operation of converting the built in data type of the array into a +python object can be one of the slowest operations in DataFusion, so it should be done sparingly.

+

The following example performs the same operation as before with is_null but demonstrates +converting to Python objects to do the evaluation.

+
In [10]: import pyarrow
+
+In [11]: import datafusion
+
+In [12]: from datafusion import udf, col
+
+In [13]: def is_null(array: pyarrow.Array) -> pyarrow.Array:
+   ....:     return pyarrow.array([value.as_py() is None for value in array])
+   ....: 
+
+In [14]: is_null_arr = udf(is_null, [pyarrow.int64()], pyarrow.bool_(), 'stable')
+
+In [15]: ctx = datafusion.SessionContext()
+
+In [16]: batch = pyarrow.RecordBatch.from_arrays(
+   ....:     [pyarrow.array([1, None, 3]), pyarrow.array([4, 5, 6])],
+   ....:     names=["a", "b"],
+   ....: )
+   ....: 
+
+In [17]: df = ctx.create_dataframe([[batch]], name="batch_array")
+
+In [18]: df.select(col("a"), is_null_arr(col("a")).alias("is_null")).show()
+DataFrame()
++---+---------+
+| a | is_null |
++---+---------+
+| 1 | false   |
+|   | true    |
+| 3 | false   |
++---+---------+
+
+
+

In this example we passed the PyArrow DataType when we defined the function +by calling udf(). If you need additional control, such as specifying +metadata or nullability of the input or output, you can instead specify a +PyArrow Field.

+

If you need to write a custom function but do not want to incur the performance +cost of converting to Python objects and back, a more advanced approach is to +write Rust based UDFs and to expose them to Python. There is an example in the +DataFusion blog +describing how to do this.

+
+
+

Aggregate Functions

+

The udaf() function allows you to define User-Defined +Aggregate Functions (UDAFs). To use this you must implement an +Accumulator that determines how the aggregation is performed.

+

When defining a UDAF there are four methods you need to implement. The update function takes the +array(s) of input and updates the internal state of the accumulator. You should define this function +to have as many input arguments as you will pass when calling the UDAF. Since aggregation may be +split into multiple batches, we must have a method to combine multiple batches. For this, we have +two functions, state and merge. state will return an array of scalar values that contain +the current state of a single batch accumulation. Then we must merge the results of these +different states. Finally evaluate is the call that will return the final result after the +merge is complete.

+

In the following example we want to define a custom aggregate function that will return the +difference between the sum of two columns. The state can be represented by a single value and we can +also see how the inputs to update and merge differ.

+
import pyarrow as pa
+import pyarrow.compute
+import datafusion
+from datafusion import col, udaf, Accumulator
+from typing import List
+
+class MyAccumulator(Accumulator):
+    """
+    Interface of a user-defined accumulation.
+    """
+    def __init__(self):
+        self._sum = 0.0
+
+    def update(self, values_a: pa.Array, values_b: pa.Array) -> None:
+        self._sum = self._sum + pyarrow.compute.sum(values_a).as_py() - pyarrow.compute.sum(values_b).as_py()
+
+    def merge(self, states: list[pa.Array]) -> None:
+        self._sum = self._sum + pyarrow.compute.sum(states[0]).as_py()
+
+    def state(self) -> list[pa.Scalar]:
+        return [pyarrow.scalar(self._sum)]
+
+    def evaluate(self) -> pa.Scalar:
+        return pyarrow.scalar(self._sum)
+
+ctx = datafusion.SessionContext()
+df = ctx.from_pydict(
+    {
+        "a": [4, 5, 6],
+        "b": [1, 2, 3],
+    }
+)
+
+my_udaf = udaf(MyAccumulator, [pa.float64(), pa.float64()], pa.float64(), [pa.float64()], 'stable')
+
+df.aggregate([], [my_udaf(col("a"), col("b")).alias("col_diff")])
+
+
+
+

FAQ

+

How do I return a list from a UDAF?

+

Both the evaluate and the state functions expect to return scalar values. +If you wish to return a list array as a scalar value, the best practice is to +wrap the values in a pyarrow.Scalar object. For example, you can return a +timestamp list with pa.scalar([...], type=pa.list_(pa.timestamp("ms"))) and +register the appropriate return or state types as +return_type=pa.list_(pa.timestamp("ms")) and +state_type=[pa.list_(pa.timestamp("ms"))], respectively.

+

As of DataFusion 52.0.0 , you can pass return any Python object, including a +PyArrow array, as the return value(s) for these functions and DataFusion will +attempt to create a scalar type from the value. DataFusion has been tested to +convert PyArrow, nanoarrow, and arro3 objects as well as primitive data types +like integers, strings, and so on.

+
+
+
+

Window Functions

+

To implement a User-Defined Window Function (UDWF) you must call the +udwf() function using a class that implements the abstract +class WindowEvaluator.

+

There are three methods of evaluation of UDWFs.

+
    +
  • evaluate is the simplest case, where you are given an array and are expected to calculate the +value for a single row of that array. This is the simplest case, but also the least performant.

  • +
  • evaluate_all computes the values for all rows for an input array at a single time.

  • +
  • evaluate_all_with_rank computes the values for all rows, but you only have the rank +information for the rows.

  • +
+

Which methods you implement are based upon which of these options are set.

+
" - f"{field.name}
" - f"
" - "" - "" - f"{formatted_value}" - f"" - f"
" - f"
{formatted_value}
{value}-high{value}-low{value}-mid]*>(\d+)-low]*>(\d+)-mid]*>(\d+)-high1-low2-low3-mid4-mid6-high8-high{value}{value}{value}{field.name}
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

uses_window_frame

supports_bounded_execution

include_rank

function_to_implement

False (default)

False (default)

False (default)

evaluate_all

False

True

False

evaluate

False

True

False

evaluate_all_with_rank

True

True/False

True/False

evaluate

+
+

UDWF options

+

When you define your UDWF you can override the functions that return these values. They will +determine which evaluate functions are called.

+
    +
  • uses_window_frame is set for functions that compute based on the specified window frame. If +your function depends upon the specified frame, set this to True.

  • +
  • supports_bounded_execution specifies if your function can be incrementally computed.

  • +
  • include_rank is set to True for window functions that can be computed only using the rank +information.

  • +
+
import pyarrow as pa
+from datafusion import udwf, col, SessionContext
+from datafusion.user_defined import WindowEvaluator
+
+class ExponentialSmooth(WindowEvaluator):
+    def __init__(self, alpha: float) -> None:
+        self.alpha = alpha
+
+    def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array:
+        results = []
+        curr_value = 0.0
+        values = values[0]
+        for idx in range(num_rows):
+            if idx == 0:
+                curr_value = values[idx].as_py()
+            else:
+                curr_value = values[idx].as_py() * self.alpha + curr_value * (
+                    1.0 - self.alpha
+                )
+            results.append(curr_value)
+
+        return pa.array(results)
+
+exp_smooth = udwf(
+    ExponentialSmooth(0.9),
+    pa.float64(),
+    pa.float64(),
+    volatility="immutable",
+)
+
+ctx = SessionContext()
+
+df = ctx.from_pydict({
+    "a": [1.0, 2.1, 2.9, 4.0, 5.1, 6.0, 6.9, 8.0]
+})
+
+df.select("a", exp_smooth(col("a")).alias("smooth_a")).show()
+
+
+
+ +
+

Table Functions

+

User Defined Table Functions are slightly different than the other functions +described here. These functions take any number of Expr arguments, but only +literal expressions are supported. Table functions must return a Table +Provider as described in the ref:_io_custom_table_provider page.

+

Once you have a table function, you can register it with the session context +by using datafusion.context.SessionContext.register_udtf().

+

There are examples of both rust backed and python based table functions in the +examples folder of the repository. If you have a rust backed table function +that you wish to expose via PyO3, you need to expose it as a PyCapsule.

+
#[pymethods]
+impl MyTableFunction {
+    fn __datafusion_table_function__<'py>(
+        &self,
+        py: Python<'py>,
+    ) -> PyResult<Bound<'py, PyCapsule>> {
+        let name = cr"datafusion_table_function".into();
+
+        let func = self.clone();
+        let provider = FFI_TableFunction::new(Arc::new(func), None);
+
+        PyCapsule::new(py, provider, Some(name))
+    }
+}
+
+
+
+ + + +
+ + + + + + + + + + + + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/common-operations/views.html b/user-guide/common-operations/views.html new file mode 100644 index 000000000..31155df55 --- /dev/null +++ b/user-guide/common-operations/views.html @@ -0,0 +1,529 @@ + + + + + + + + Registering Views — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ + + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Registering Views

+

You can use the context’s register_view method to register a DataFrame as a view

+
from datafusion import SessionContext, col, literal
+
+# Create a DataFusion context
+ctx = SessionContext()
+
+# Create sample data
+data = {"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]}
+
+# Create a DataFrame from the dictionary
+df = ctx.from_pydict(data, "my_table")
+
+# Filter the DataFrame (for example, keep rows where a > 2)
+df_filtered = df.filter(col("a") > literal(2))
+
+# Register the dataframe as a view with the context
+ctx.register_view("view1", df_filtered)
+
+# Now run a SQL query against the registered view
+df_view = ctx.sql("SELECT * FROM view1")
+
+# Collect the results
+results = df_view.collect()
+
+# Convert results to a list of dictionaries for display
+result_dicts = [batch.to_pydict() for batch in results]
+
+print(result_dicts)
+
+
+

This will output:

+
[{'a': [3, 4, 5], 'b': [30, 40, 50]}]
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/common-operations/windows.html b/user-guide/common-operations/windows.html new file mode 100644 index 000000000..2202f7eb1 --- /dev/null +++ b/user-guide/common-operations/windows.html @@ -0,0 +1,835 @@ + + + + + + + + Window Functions — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + + + +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Window Functions

+

In this section you will learn about window functions. A window function utilizes values from one or +multiple rows to produce a result for each individual row, unlike an aggregate function that +provides a single value for multiple rows.

+

The window functions are available in the functions module.

+

We’ll use the pokemon dataset (from Ritchie Vink) in the following examples.

+
In [1]: from datafusion import SessionContext
+
+In [2]: from datafusion import col, lit
+
+In [3]: from datafusion import functions as f
+
+In [4]: ctx = SessionContext()
+
+In [5]: df = ctx.read_csv("pokemon.csv")
+
+
+

Here is an example that shows how you can compare each pokemon’s speed to the speed of the +previous row in the DataFrame.

+
In [6]: df.select(
+   ...:     col('"Name"'),
+   ...:     col('"Speed"'),
+   ...:     f.lag(col('"Speed"')).alias("Previous Speed")
+   ...: )
+   ...: 
+Out[6]: 
+DataFrame()
++---------------------------+-------+----------------+
+| Name                      | Speed | Previous Speed |
++---------------------------+-------+----------------+
+| Bulbasaur                 | 45    |                |
+| Ivysaur                   | 60    | 45             |
+| Venusaur                  | 80    | 60             |
+| VenusaurMega Venusaur     | 80    | 80             |
+| Charmander                | 65    | 80             |
+| Charmeleon                | 80    | 65             |
+| Charizard                 | 100   | 80             |
+| CharizardMega Charizard X | 100   | 100            |
+| CharizardMega Charizard Y | 100   | 100            |
+| Squirtle                  | 43    | 100            |
++---------------------------+-------+----------------+
+Data truncated.
+
+
+
+

Setting Parameters

+
+

Ordering

+

You can control the order in which rows are processed by window functions by providing +a list of order_by functions for the order_by parameter.

+
In [7]: df.select(
+   ...:     col('"Name"'),
+   ...:     col('"Attack"'),
+   ...:     col('"Type 1"'),
+   ...:     f.rank(
+   ...:         partition_by=[col('"Type 1"')],
+   ...:         order_by=[col('"Attack"').sort(ascending=True)],
+   ...:     ).alias("rank"),
+   ...: ).sort(col('"Type 1"'), col('"Attack"'))
+   ...: 
+Out[7]: 
+DataFrame()
++------------+--------+--------+------+
+| Name       | Attack | Type 1 | rank |
++------------+--------+--------+------+
+| Metapod    | 20     | Bug    | 1    |
+| Kakuna     | 25     | Bug    | 2    |
+| Caterpie   | 30     | Bug    | 3    |
+| Weedle     | 35     | Bug    | 4    |
+| Butterfree | 45     | Bug    | 5    |
+| Venonat    | 55     | Bug    | 6    |
+| Venomoth   | 65     | Bug    | 7    |
+| Paras      | 70     | Bug    | 8    |
+| Beedrill   | 90     | Bug    | 9    |
+| Parasect   | 95     | Bug    | 10   |
++------------+--------+--------+------+
+Data truncated.
+
+
+
+
+

Partitions

+

A window function can take a list of partition_by columns similar to an +Aggregation Function. This will cause the window values to be evaluated +independently for each of the partitions. In the example above, we found the rank of each +Pokemon per Type 1 partitions. We can see the first couple of each partition if we do +the following:

+
In [8]: df.select(
+   ...:     col('"Name"'),
+   ...:     col('"Attack"'),
+   ...:     col('"Type 1"'),
+   ...:     f.rank(
+   ...:         partition_by=[col('"Type 1"')],
+   ...:         order_by=[col('"Attack"').sort(ascending=True)],
+   ...:     ).alias("rank"),
+   ...: ).filter(col("rank") < lit(3)).sort(col('"Type 1"'), col("rank"))
+   ...: 
+Out[8]: 
+DataFrame()
++-----------+--------+----------+------+
+| Name      | Attack | Type 1   | rank |
++-----------+--------+----------+------+
+| Metapod   | 20     | Bug      | 1    |
+| Kakuna    | 25     | Bug      | 2    |
+| Dratini   | 64     | Dragon   | 1    |
+| Dragonair | 84     | Dragon   | 2    |
+| Voltorb   | 30     | Electric | 1    |
+| Magnemite | 35     | Electric | 2    |
+| Clefairy  | 45     | Fairy    | 1    |
+| Clefable  | 70     | Fairy    | 2    |
+| Machop    | 80     | Fighting | 1    |
+| Mankey    | 80     | Fighting | 1    |
++-----------+--------+----------+------+
+Data truncated.
+
+
+
+
+

Window Frame

+

When using aggregate functions, the Window Frame of defines the rows over which it operates. +If you do not specify a Window Frame, the frame will be set depending on the following +criteria.

+
    +
  • If an order_by clause is set, the default window frame is defined as the rows between +unbounded preceding and the current row.

  • +
  • If an order_by is not set, the default frame is defined as the rows between unbounded +and unbounded following (the entire partition).

  • +
+

Window Frames are defined by three parameters: unit type, starting bound, and ending bound.

+

The unit types available are:

+
    +
  • Rows: The starting and ending boundaries are defined by the number of rows relative to the +current row.

  • +
  • Range: When using Range, the order_by clause must have exactly one term. The boundaries +are defined bow how close the rows are to the value of the expression in the order_by +parameter.

  • +
  • Groups: A “group” is the set of all rows that have equivalent values for all terms in the +order_by clause.

  • +
+

In this example we perform a “rolling average” of the speed of the current Pokemon and the +two preceding rows.

+
In [9]: from datafusion.expr import Window, WindowFrame
+
+In [10]: df.select(
+   ....:     col('"Name"'),
+   ....:     col('"Speed"'),
+   ....:     f.avg(col('"Speed"'))
+   ....:     .over(Window(window_frame=WindowFrame("rows", 2, 0), order_by=[col('"Speed"')]))
+   ....:     .alias("Previous Speed"),
+   ....: )
+   ....: 
+Out[10]: 
+DataFrame()
++------------+-------+--------------------+
+| Name       | Speed | Previous Speed     |
++------------+-------+--------------------+
+| Slowpoke   | 15    | 15.0               |
+| Jigglypuff | 20    | 17.5               |
+| Geodude    | 20    | 18.333333333333332 |
+| Paras      | 25    | 21.666666666666668 |
+| Grimer     | 25    | 23.333333333333332 |
+| Rhyhorn    | 25    | 25.0               |
+| Snorlax    | 30    | 26.666666666666668 |
+| Metapod    | 30    | 28.333333333333332 |
+| Oddish     | 30    | 30.0               |
+| Parasect   | 30    | 30.0               |
++------------+-------+--------------------+
+Data truncated.
+
+
+
+
+

Null Treatment

+

When using aggregate functions as window functions, it is often useful to specify how null values +should be treated. In order to do this you need to use the builder function. In future releases +we expect this to be simplified in the interface.

+

One common usage for handling nulls is the case where you want to find the last value up to the +current row. In the following example we demonstrate how setting the null treatment to ignore +nulls will fill in with the value of the most recent non-null row. To do this, we also will set +the window frame so that we only process up to the current row.

+

In this example, we filter down to one specific type of Pokemon that does have some entries in +it’s Type 2 column that are null.

+
In [11]: from datafusion.common import NullTreatment
+
+In [12]: df.filter(col('"Type 1"') == lit("Bug")).select(
+   ....:     '"Name"',
+   ....:     '"Type 2"',
+   ....:     f.last_value(col('"Type 2"'))
+   ....:     .over(
+   ....:         Window(
+   ....:             window_frame=WindowFrame("rows", None, 0),
+   ....:             order_by=[col('"Speed"')],
+   ....:             null_treatment=NullTreatment.IGNORE_NULLS,
+   ....:         )
+   ....:     )
+   ....:     .alias("last_wo_null"),
+   ....:     f.last_value(col('"Type 2"'))
+   ....:     .over(
+   ....:         Window(
+   ....:             window_frame=WindowFrame("rows", None, 0),
+   ....:             order_by=[col('"Speed"')],
+   ....:             null_treatment=NullTreatment.RESPECT_NULLS,
+   ....:         )
+   ....:     )
+   ....:     .alias("last_with_null"),
+   ....: )
+   ....: 
+Out[12]: 
+DataFrame()
++------------+--------+--------------+----------------+
+| Name       | Type 2 | last_wo_null | last_with_null |
++------------+--------+--------------+----------------+
+| Paras      | Grass  | Grass        | Grass          |
+| Metapod    |        | Grass        |                |
+| Parasect   | Grass  | Grass        | Grass          |
+| Kakuna     | Poison | Poison       | Poison         |
+| Caterpie   |        | Poison       |                |
+| Venonat    | Poison | Poison       | Poison         |
+| Weedle     | Poison | Poison       | Poison         |
+| Butterfree | Flying | Flying       | Flying         |
+| Beedrill   | Poison | Poison       | Poison         |
+| Pinsir     |        | Poison       |                |
++------------+--------+--------------+----------------+
+Data truncated.
+
+
+
+
+
+

Aggregate Functions

+

You can use any Aggregation Function as a window function. Currently +aggregate functions must use the deprecated +datafusion.functions.window() API but this should be resolved in +DataFusion 42.0 (Issue Link). Here +is an example that shows how to compare each pokemons’s attack power with the average attack +power in its "Type 1" using the datafusion.functions.avg() function.

+
In [13]: df.select(
+   ....:     col('"Name"'),
+   ....:     col('"Attack"'),
+   ....:     col('"Type 1"'),
+   ....:     f.window("avg", [col('"Attack"')])
+   ....:         .partition_by(col('"Type 1"'))
+   ....:         .build()
+   ....:         .alias("Average Attack"),
+   ....: )
+   ....: 
+Out[13]: 
+DataFrame()
++-----------------------+--------+--------+-------------------+
+| Name                  | Attack | Type 1 | Average Attack    |
++-----------------------+--------+--------+-------------------+
+| Metapod               | 20     | Bug    | 76.42857142857143 |
+| Butterfree            | 45     | Bug    | 76.42857142857143 |
+| Weedle                | 35     | Bug    | 76.42857142857143 |
+| Kakuna                | 25     | Bug    | 76.42857142857143 |
+| Beedrill              | 90     | Bug    | 76.42857142857143 |
+| BeedrillMega Beedrill | 150    | Bug    | 76.42857142857143 |
+| Paras                 | 70     | Bug    | 76.42857142857143 |
+| Parasect              | 95     | Bug    | 76.42857142857143 |
+| Venonat               | 55     | Bug    | 76.42857142857143 |
+| Venomoth              | 65     | Bug    | 76.42857142857143 |
++-----------------------+--------+--------+-------------------+
+Data truncated.
+
+
+
+
+

Available Functions

+

The possible window functions are:

+
    +
  1. +
    Rank Functions
    +
    +
    +
  2. +
  3. +
    Analytical Functions
    +
    +
    +
  4. +
  5. +
    Aggregate Functions
    +
    +
    +
  6. +
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/configuration.html b/user-guide/configuration.html new file mode 100644 index 000000000..642d665b5 --- /dev/null +++ b/user-guide/configuration.html @@ -0,0 +1,665 @@ + + + + + + + + Configuration — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Configuration

+

Let’s look at how we can configure DataFusion. When creating a SessionContext, you can pass in +a SessionConfig and RuntimeEnvBuilder object. These two cover a wide range of options.

+
from datafusion import RuntimeEnvBuilder, SessionConfig, SessionContext
+
+# create a session context with default settings
+ctx = SessionContext()
+print(ctx)
+
+# create a session context with explicit runtime and config settings
+runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000)
+config = (
+    SessionConfig()
+    .with_create_default_catalog_and_schema(True)
+    .with_default_catalog_and_schema("foo", "bar")
+    .with_target_partitions(8)
+    .with_information_schema(True)
+    .with_repartition_joins(False)
+    .with_repartition_aggregations(False)
+    .with_repartition_windows(False)
+    .with_parquet_pruning(False)
+    .set("datafusion.execution.parquet.pushdown_filters", "true")
+)
+ctx = SessionContext(config, runtime)
+print(ctx)
+
+
+
+

Maximizing CPU Usage

+

DataFusion uses partitions to parallelize work. For small queries the +default configuration (number of CPU cores) is often sufficient, but to +fully utilize available hardware you can tune how many partitions are +created and when DataFusion will repartition data automatically.

+

Configure a SessionContext with a higher partition count:

+
from datafusion import SessionConfig, SessionContext
+
+# allow up to 16 concurrent partitions
+config = SessionConfig().with_target_partitions(16)
+ctx = SessionContext(config)
+
+
+

Automatic repartitioning for joins, aggregations, window functions and +other operations can be enabled to increase parallelism:

+
config = (
+    SessionConfig()
+    .with_target_partitions(16)
+    .with_repartition_joins(True)
+    .with_repartition_aggregations(True)
+    .with_repartition_windows(True)
+)
+
+
+

Manual repartitioning is available on DataFrames when you need precise +control:

+
from datafusion import col
+
+df = ctx.read_parquet("data.parquet")
+
+# Evenly divide into 16 partitions
+df = df.repartition(16)
+
+# Or partition by the hash of a column
+df = df.repartition_by_hash(col("a"), num=16)
+
+result = df.collect()
+
+
+
+

Benchmark Example

+

The repository includes a benchmark script that demonstrates how to maximize CPU usage +with DataFusion. The benchmarks/max_cpu_usage.py script shows a practical example +of configuring DataFusion for optimal parallelism.

+

You can run the benchmark script to see the impact of different configuration settings:

+
# Run with default settings (uses all CPU cores)
+python benchmarks/max_cpu_usage.py
+
+# Run with specific number of rows and partitions
+python benchmarks/max_cpu_usage.py --rows 5000000 --partitions 16
+
+# See all available options
+python benchmarks/max_cpu_usage.py --help
+
+
+

Here’s an example showing the performance difference between single and multiple partitions:

+
# Single partition - slower processing
+$ python benchmarks/max_cpu_usage.py --rows=10000000 --partitions 1
+Processed 10000000 rows using 1 partitions in 0.107s
+
+# Multiple partitions - faster processing
+$ python benchmarks/max_cpu_usage.py --rows=10000000 --partitions 10
+Processed 10000000 rows using 10 partitions in 0.038s
+
+
+

This example demonstrates nearly 3x performance improvement (0.107s vs 0.038s) when using +10 partitions instead of 1, showcasing how proper partitioning can significantly improve +CPU utilization and query performance.

+

The script demonstrates several key optimization techniques:

+
    +
  1. Higher target partition count: Uses with_target_partitions() to set the number of concurrent partitions

  2. +
  3. Automatic repartitioning: Enables repartitioning for joins, aggregations, and window functions

  4. +
  5. Manual repartitioning: Uses repartition() to ensure all partitions are utilized

  6. +
  7. CPU-intensive operations: Performs aggregations that can benefit from parallelization

  8. +
+

The benchmark creates synthetic data and measures the time taken to perform a sum aggregation +across the specified number of partitions. This helps you understand how partition configuration +affects performance on your specific hardware.

+
+

Important Considerations

+

The provided benchmark script demonstrates partitioning concepts using synthetic in-memory data +and simple aggregation operations. While useful for understanding basic configuration principles, +actual performance in production environments may vary significantly based on numerous factors:

+

Data Sources and I/O Characteristics:

+
    +
  • Table providers: Performance differs greatly between Parquet files, CSV files, databases, and cloud storage

  • +
  • Storage type: Local SSD, network-attached storage, and cloud storage have vastly different characteristics

  • +
  • Network latency: Remote data sources introduce additional latency considerations

  • +
  • File sizes and distribution: Large files may benefit differently from partitioning than many small files

  • +
+

Query and Workload Characteristics:

+
    +
  • Operation complexity: Simple aggregations versus complex joins, window functions, or nested queries

  • +
  • Data distribution: Skewed data may not partition evenly, affecting parallel efficiency

  • +
  • Memory usage: Large datasets may require different memory management strategies

  • +
  • Concurrent workloads: Multiple queries running simultaneously affect resource allocation

  • +
+

Hardware and Environment Factors:

+
    +
  • CPU architecture: Different processors have varying parallel processing capabilities

  • +
  • Available memory: Limited RAM may require different optimization strategies

  • +
  • System load: Other applications competing for resources affect DataFusion performance

  • +
+

Recommendations for Production Use:

+

To optimize DataFusion for your specific use case, it is strongly recommended to:

+
    +
  1. Create custom benchmarks using your actual data sources, formats, and query patterns

  2. +
  3. Test with representative data volumes that match your production workloads

  4. +
  5. Measure end-to-end performance including data loading, processing, and result handling

  6. +
  7. Evaluate different configuration combinations for your specific hardware and workload

  8. +
  9. Monitor resource utilization (CPU, memory, I/O) to identify bottlenecks in your environment

  10. +
+

This approach will provide more accurate insights into how DataFusion configuration options +will impact your particular applications and infrastructure.

+

For more information about available SessionConfig options, see the rust DataFusion Configuration guide, +and about RuntimeEnvBuilder options in the rust online API documentation.

+
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/data-sources.html b/user-guide/data-sources.html new file mode 100644 index 000000000..a2b1c80e5 --- /dev/null +++ b/user-guide/data-sources.html @@ -0,0 +1,832 @@ + + + + + + + + Data Sources — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

Data Sources

+

DataFusion provides a wide variety of ways to get data into a DataFrame to perform operations.

+
+

Local file

+

DataFusion has the ability to read from a variety of popular file formats, such as Parquet, +CSV, JSON, and AVRO.

+
In [1]: from datafusion import SessionContext
+
+In [2]: ctx = SessionContext()
+
+In [3]: df = ctx.read_csv("pokemon.csv")
+
+In [4]: df.show()
+DataFrame()
++----+---------------------------+--------+--------+-------+----+--------+---------+---------+---------+-------+------------+-----------+
+| #  | Name                      | Type 1 | Type 2 | Total | HP | Attack | Defense | Sp. Atk | Sp. Def | Speed | Generation | Legendary |
++----+---------------------------+--------+--------+-------+----+--------+---------+---------+---------+-------+------------+-----------+
+| 1  | Bulbasaur                 | Grass  | Poison | 318   | 45 | 49     | 49      | 65      | 65      | 45    | 1          | false     |
+| 2  | Ivysaur                   | Grass  | Poison | 405   | 60 | 62     | 63      | 80      | 80      | 60    | 1          | false     |
+| 3  | Venusaur                  | Grass  | Poison | 525   | 80 | 82     | 83      | 100     | 100     | 80    | 1          | false     |
+| 3  | VenusaurMega Venusaur     | Grass  | Poison | 625   | 80 | 100    | 123     | 122     | 120     | 80    | 1          | false     |
+| 4  | Charmander                | Fire   |        | 309   | 39 | 52     | 43      | 60      | 50      | 65    | 1          | false     |
+| 5  | Charmeleon                | Fire   |        | 405   | 58 | 64     | 58      | 80      | 65      | 80    | 1          | false     |
+| 6  | Charizard                 | Fire   | Flying | 534   | 78 | 84     | 78      | 109     | 85      | 100   | 1          | false     |
+| 6  | CharizardMega Charizard X | Fire   | Dragon | 634   | 78 | 130    | 111     | 130     | 85      | 100   | 1          | false     |
+| 6  | CharizardMega Charizard Y | Fire   | Flying | 634   | 78 | 104    | 78      | 159     | 115     | 100   | 1          | false     |
+| 7  | Squirtle                  | Water  |        | 314   | 44 | 48     | 65      | 50      | 64      | 43    | 1          | false     |
+| 8  | Wartortle                 | Water  |        | 405   | 59 | 63     | 80      | 65      | 80      | 58    | 1          | false     |
+| 9  | Blastoise                 | Water  |        | 530   | 79 | 83     | 100     | 85      | 105     | 78    | 1          | false     |
+| 9  | BlastoiseMega Blastoise   | Water  |        | 630   | 79 | 103    | 120     | 135     | 115     | 78    | 1          | false     |
+| 10 | Caterpie                  | Bug    |        | 195   | 45 | 30     | 35      | 20      | 20      | 45    | 1          | false     |
+| 11 | Metapod                   | Bug    |        | 205   | 50 | 20     | 55      | 25      | 25      | 30    | 1          | false     |
+| 12 | Butterfree                | Bug    | Flying | 395   | 60 | 45     | 50      | 90      | 80      | 70    | 1          | false     |
+| 13 | Weedle                    | Bug    | Poison | 195   | 40 | 35     | 30      | 20      | 20      | 50    | 1          | false     |
+| 14 | Kakuna                    | Bug    | Poison | 205   | 45 | 25     | 50      | 25      | 25      | 35    | 1          | false     |
+| 15 | Beedrill                  | Bug    | Poison | 395   | 65 | 90     | 40      | 45      | 80      | 75    | 1          | false     |
+| 15 | BeedrillMega Beedrill     | Bug    | Poison | 495   | 65 | 150    | 40      | 15      | 80      | 145   | 1          | false     |
++----+---------------------------+--------+--------+-------+----+--------+---------+---------+---------+-------+------------+-----------+
+
+
+
+
+

Create in-memory

+

Sometimes it can be convenient to create a small DataFrame from a Python list or dictionary object. +To do this in DataFusion, you can use one of the three functions +from_pydict(), +from_pylist(), or +create_dataframe().

+

As their names suggest, from_pydict and from_pylist will create DataFrames from Python +dictionary and list objects, respectively. create_dataframe assumes you will pass in a list +of list of PyArrow Record Batches.

+

The following three examples all will create identical DataFrames:

+
In [5]: import pyarrow as pa
+
+In [6]: ctx.from_pylist([
+   ...:     { "a": 1, "b": 10.0, "c": "alpha" },
+   ...:     { "a": 2, "b": 20.0, "c": "beta" },
+   ...:     { "a": 3, "b": 30.0, "c": "gamma" },
+   ...: ]).show()
+   ...: 
+DataFrame()
++---+------+-------+
+| a | b    | c     |
++---+------+-------+
+| 1 | 10.0 | alpha |
+| 2 | 20.0 | beta  |
+| 3 | 30.0 | gamma |
++---+------+-------+
+
+In [7]: ctx.from_pydict({
+   ...:     "a": [1, 2, 3],
+   ...:     "b": [10.0, 20.0, 30.0],
+   ...:     "c": ["alpha", "beta", "gamma"],
+   ...: }).show()
+   ...: 
+DataFrame()
++---+------+-------+
+| a | b    | c     |
++---+------+-------+
+| 1 | 10.0 | alpha |
+| 2 | 20.0 | beta  |
+| 3 | 30.0 | gamma |
++---+------+-------+
+
+In [8]: batch = pa.RecordBatch.from_arrays(
+   ...:     [
+   ...:         pa.array([1, 2, 3]),
+   ...:         pa.array([10.0, 20.0, 30.0]),
+   ...:         pa.array(["alpha", "beta", "gamma"]),
+   ...:     ],
+   ...:     names=["a", "b", "c"],
+   ...: )
+   ...: 
+
+In [9]: ctx.create_dataframe([[batch]]).show()
+DataFrame()
++---+------+-------+
+| a | b    | c     |
++---+------+-------+
+| 1 | 10.0 | alpha |
+| 2 | 20.0 | beta  |
+| 3 | 30.0 | gamma |
++---+------+-------+
+
+
+
+
+

Object Store

+

DataFusion has support for multiple storage options in addition to local files. +The example below requires an appropriate S3 account with access credentials.

+

Supported Object Stores are

+ +
from datafusion.object_store import AmazonS3
+
+region = "us-east-1"
+bucket_name = "yellow-trips"
+
+s3 = AmazonS3(
+    bucket_name=bucket_name,
+    region=region,
+    access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
+    secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
+)
+
+path = f"s3://{bucket_name}/"
+ctx.register_object_store("s3://", s3, None)
+
+ctx.register_parquet("trips", path)
+
+ctx.table("trips").show()
+
+
+
+
+

Other DataFrame Libraries

+

DataFusion can import DataFrames directly from other libraries, such as +Polars and Pandas. +Since DataFusion version 42.0.0, any DataFrame library that supports the Arrow FFI PyCapsule +interface can be imported to DataFusion using the +from_arrow() function. Older versions of Polars may +not support the arrow interface. In those cases, you can still import via the +from_polars() function.

+
import pandas as pd
+
+data = { "a": [1, 2, 3], "b": [10.0, 20.0, 30.0], "c": ["alpha", "beta", "gamma"] }
+pandas_df = pd.DataFrame(data)
+
+datafusion_df = ctx.from_arrow(pandas_df)
+datafusion_df.show()
+
+
+
import polars as pl
+polars_df = pl.DataFrame(data)
+
+datafusion_df = ctx.from_arrow(polars_df)
+datafusion_df.show()
+
+
+
+
+

Delta Lake

+

DataFusion 43.0.0 and later support the ability to register table providers from sources such +as Delta Lake. This will require a recent version of +deltalake to provide the required interfaces.

+
from deltalake import DeltaTable
+
+delta_table = DeltaTable("path_to_table")
+ctx.register_table("my_delta_table", delta_table)
+df = ctx.table("my_delta_table")
+df.show()
+
+
+

On older versions of deltalake (prior to 0.22) you can use the +Arrow DataSet +interface to import to DataFusion, but this does not support features such as filter push down +which can lead to a significant performance difference.

+
from deltalake import DeltaTable
+
+delta_table = DeltaTable("path_to_table")
+ctx.register_dataset("my_delta_table", delta_table.to_pyarrow_dataset())
+df = ctx.table("my_delta_table")
+df.show()
+
+
+
+
+

Apache Iceberg

+

DataFusion 45.0.0 and later support the ability to register Apache Iceberg tables as table providers through the Custom Table Provider interface.

+

This requires either the pyiceberg library (>=0.10.0) or the pyiceberg-core library (>=0.5.0).

+
    +
  • The pyiceberg-core library exposes Iceberg Rust’s implementation of the Custom Table Provider interface as python bindings.

  • +
  • The pyiceberg library utilizes the pyiceberg-core python bindings under the hood and provides a native way for Python users to interact with the DataFusion.

  • +
+
from datafusion import SessionContext
+from pyiceberg.catalog import load_catalog
+import pyarrow as pa
+
+# Load catalog and create/load a table
+catalog = load_catalog("catalog", type="in-memory")
+catalog.create_namespace_if_not_exists("default")
+
+# Create some sample data
+data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]})
+iceberg_table = catalog.create_table("default.test", schema=data.schema)
+iceberg_table.append(data)
+
+# Register the table with DataFusion
+ctx = SessionContext()
+ctx.register_table_provider("test", iceberg_table)
+
+# Query the table using DataFusion
+ctx.table("test").show()
+
+
+

Note that the Datafusion integration rely on features from the Iceberg Rust implementation instead of the PyIceberg implementation. +Features that are available in PyIceberg but not yet in Iceberg Rust will not be available when using DataFusion.

+
+
+

Custom Table Provider

+

You can implement a custom Data Provider in Rust and expose it to DataFusion through the +the interface as describe in the Custom Table Provider +section. This is an advanced topic, but a +user example +is provided in the DataFusion repository.

+
+
+
+

Catalog

+

A common technique for organizing tables is using a three level hierarchical approach. DataFusion +supports this form of organizing using the Catalog, +Schema, and Table. By default, +a SessionContext comes with a single Catalog and a single Schema +with the names datafusion and default, respectively.

+

The default implementation uses an in-memory approach to the catalog and schema. We have support +for adding additional in-memory catalogs and schemas. This can be done like in the following +example:

+
from datafusion.catalog import Catalog, Schema
+
+my_catalog = Catalog.memory_catalog()
+my_schema = Schema.memory_schema()
+
+my_catalog.register_schema("my_schema_name", my_schema)
+
+ctx.register_catalog("my_catalog_name", my_catalog)
+
+
+

You could then register tables in my_schema and access them either through the DataFrame +API or via sql commands such as "SELECT * from my_catalog_name.my_schema_name.my_table".

+
+

User Defined Catalog and Schema

+

If the in-memory catalogs are insufficient for your uses, there are two approaches you can take +to implementing a custom catalog and/or schema. In the below discussion, we describe how to +implement these for a Catalog, but the approach to implementing for a Schema is nearly +identical.

+

DataFusion supports Catalogs written in either Rust or Python. If you write a Catalog in Rust, +you will need to export it as a Python library via PyO3. There is a complete example of a +catalog implemented this way in the +examples folder +of our repository. Writing catalog providers in Rust provides typically can lead to significant +performance improvements over the Python based approach.

+

To implement a Catalog in Python, you will need to inherit from the abstract base class +CatalogProvider. There are examples in the +unit tests of +implementing a basic Catalog in Python where we simply keep a dictionary of the +registered Schemas.

+

One important note for developers is that when we have a Catalog defined in Python, we have +two different ways of accessing this Catalog. First, we register the catalog with a Rust +wrapper. This allows for any rust based code to call the Python functions as necessary. +Second, if the user access the Catalog via the Python API, we identify this and return back +the original Python object that implements the Catalog. This is an important distinction +for developers because we do not return a Python wrapper around the Rust wrapper of the +original Python object.

+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/dataframe/index.html b/user-guide/dataframe/index.html new file mode 100644 index 000000000..80c598120 --- /dev/null +++ b/user-guide/dataframe/index.html @@ -0,0 +1,872 @@ + + + + + + + + DataFrames — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

DataFrames

+
+

Overview

+

The DataFrame class is the core abstraction in DataFusion that represents tabular data and operations +on that data. DataFrames provide a flexible API for transforming data through various operations such as +filtering, projection, aggregation, joining, and more.

+

A DataFrame represents a logical plan that is lazily evaluated. The actual execution occurs only when +terminal operations like collect(), show(), or to_pandas() are called.

+
+
+

Creating DataFrames

+

DataFrames can be created in several ways:

+
    +
  • From SQL queries via a SessionContext:

    +
    from datafusion import SessionContext
    +
    +ctx = SessionContext()
    +df = ctx.sql("SELECT * FROM your_table")
    +
    +
    +
  • +
  • From registered tables:

    +
    df = ctx.table("your_table")
    +
    +
    +
  • +
  • From various data sources:

    +
    # From CSV files (see :ref:`io_csv` for detailed options)
    +df = ctx.read_csv("path/to/data.csv")
    +
    +# From Parquet files (see :ref:`io_parquet` for detailed options)
    +df = ctx.read_parquet("path/to/data.parquet")
    +
    +# From JSON files (see :ref:`io_json` for detailed options)
    +df = ctx.read_json("path/to/data.json")
    +
    +# From Avro files (see :ref:`io_avro` for detailed options)
    +df = ctx.read_avro("path/to/data.avro")
    +
    +# From Pandas DataFrame
    +import pandas as pd
    +pandas_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
    +df = ctx.from_pandas(pandas_df)
    +
    +# From Arrow data
    +import pyarrow as pa
    +batch = pa.RecordBatch.from_arrays(
    +    [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
    +    names=["a", "b"]
    +)
    +df = ctx.from_arrow(batch)
    +
    +
    +
  • +
+

For detailed information about reading from different data sources, see the I/O Guide. +For custom data sources, see Custom Table Provider.

+
+
+

Common DataFrame Operations

+

DataFusion’s DataFrame API offers a wide range of operations:

+
from datafusion import column, literal
+
+# Select specific columns
+df = df.select("col1", "col2")
+
+# Select with expressions
+df = df.select(column("a") + column("b"), column("a") - column("b"))
+
+# Filter rows (expressions or SQL strings)
+df = df.filter(column("age") > literal(25))
+df = df.filter("age > 25")
+
+# Add computed columns
+df = df.with_column("full_name", column("first_name") + literal(" ") + column("last_name"))
+
+# Multiple column additions
+df = df.with_columns(
+    (column("a") + column("b")).alias("sum"),
+    (column("a") * column("b")).alias("product")
+)
+
+# Sort data
+df = df.sort(column("age").sort(ascending=False))
+
+# Join DataFrames
+df = df1.join(df2, on="user_id", how="inner")
+
+# Aggregate data
+from datafusion import functions as f
+df = df.aggregate(
+    [],  # Group by columns (empty for global aggregation)
+    [f.sum(column("amount")).alias("total_amount")]
+)
+
+# Limit rows
+df = df.limit(100)
+
+# Drop columns
+df = df.drop("temporary_column")
+
+
+
+
+

Column Names as Function Arguments

+

Some DataFrame methods accept column names when an argument refers to an +existing column. These include:

+
    +
  • select()

  • +
  • sort()

  • +
  • drop()

  • +
  • join() (on argument)

  • +
  • aggregate() (grouping columns)

  • +
+

See the full function documentation for details on any specific function.

+

Note that join_on() expects col()/column() expressions rather than plain strings.

+

For such methods, you can pass column names directly:

+
from datafusion import col, functions as f
+
+df.sort('id')
+df.aggregate('id', [f.count(col('value'))])
+
+
+

The same operation can also be written with explicit column expressions, using either col() or column():

+
from datafusion import col, column, functions as f
+
+df.sort(col('id'))
+df.aggregate(column('id'), [f.count(col('value'))])
+
+
+

Note that column() is an alias of col(), so you can use either name; the example above shows both in action.

+

Whenever an argument represents an expression—such as in +filter() or +with_column()—use col() to reference +columns. The comparison and arithmetic operators on Expr will automatically +convert any non-Expr value into a literal expression, so writing

+
from datafusion import col
+df.filter(col("age") > 21)
+
+
+

is equivalent to using lit(21) explicitly. Use lit() (also available +as literal()) when you need to construct a literal expression directly.

+
+
+

Terminal Operations

+

To materialize the results of your DataFrame operations:

+
# Collect all data as PyArrow RecordBatches
+result_batches = df.collect()
+
+# Convert to various formats
+pandas_df = df.to_pandas()        # Pandas DataFrame
+polars_df = df.to_polars()        # Polars DataFrame
+arrow_table = df.to_arrow_table() # PyArrow Table
+py_dict = df.to_pydict()          # Python dictionary
+py_list = df.to_pylist()          # Python list of dictionaries
+
+# Display results
+df.show()                         # Print tabular format to console
+
+# Count rows
+count = df.count()
+
+# Collect a single column of data as a PyArrow Array
+arr = df.collect_column("age")
+
+
+
+
+

Zero-copy streaming to Arrow-based Python libraries

+

DataFusion DataFrames implement the __arrow_c_stream__ protocol, enabling +zero-copy, lazy streaming into Arrow-based Python libraries. With the streaming +protocol, batches are produced on demand.

+
+

Note

+

The protocol is implementation-agnostic and works with any Python library +that understands the Arrow C streaming interface (for example, PyArrow +or other Arrow-compatible implementations). The sections below provide a +short PyArrow-specific example and general guidance for other +implementations.

+
+
+
+

PyArrow

+
import pyarrow as pa
+
+# Create a PyArrow RecordBatchReader without materializing all batches
+reader = pa.RecordBatchReader.from_stream(df)
+for batch in reader:
+    ...  # process each batch as it is produced
+
+
+

DataFrames are also iterable, yielding datafusion.RecordBatch +objects lazily so you can loop over results directly without importing +PyArrow:

+
for batch in df:
+    ...  # each batch is a ``datafusion.RecordBatch``
+
+
+

Each batch exposes to_pyarrow(), allowing conversion to a PyArrow +table. pa.table(df) collects the entire DataFrame eagerly into a +PyArrow table:

+
import pyarrow as pa
+table = pa.table(df)
+
+
+

Asynchronous iteration is supported as well, allowing integration with +asyncio event loops:

+
async for batch in df:
+    ...  # process each batch as it is produced
+
+
+

To work with the stream directly, use execute_stream(), which returns a +RecordBatchStream.

+
stream = df.execute_stream()
+for batch in stream:
+    ...
+
+
+
+

Execute as Stream

+

For finer control over streaming execution, use +execute_stream() to obtain a +datafusion.RecordBatchStream:

+
stream = df.execute_stream()
+for batch in stream:
+    ...  # process each batch as it is produced
+
+
+
+

Tip

+

To get a PyArrow reader instead, call

+

pa.RecordBatchReader.from_stream(df).

+
+

When partition boundaries are important, +execute_stream_partitioned() +returns an iterable of datafusion.RecordBatchStream objects, one per +partition:

+
for stream in df.execute_stream_partitioned():
+    for batch in stream:
+        ...  # each stream yields RecordBatches
+
+
+

To process partitions concurrently, first collect the streams into a list +and then poll each one in a separate asyncio task:

+
import asyncio
+
+async def consume(stream):
+    async for batch in stream:
+        ...
+
+streams = list(df.execute_stream_partitioned())
+await asyncio.gather(*(consume(s) for s in streams))
+
+
+

See Arrow for additional details on the Arrow interface.

+
+
+
+

HTML Rendering

+

When working in Jupyter notebooks or other environments that support HTML rendering, DataFrames will +automatically display as formatted HTML tables. For detailed information about customizing HTML +rendering, formatting options, and advanced styling, see HTML Rendering in Jupyter.

+
+
+

Core Classes

+
+
DataFrame

The main DataFrame class for building and executing queries.

+

See: datafusion.DataFrame

+
+
SessionContext

The primary entry point for creating DataFrames from various data sources.

+

Key methods for DataFrame creation:

+
    +
  • read_csv() - Read CSV files

  • +
  • read_parquet() - Read Parquet files

  • +
  • read_json() - Read JSON files

  • +
  • read_avro() - Read Avro files

  • +
  • table() - Access registered tables

  • +
  • sql() - Execute SQL queries

  • +
  • from_pandas() - Create from Pandas DataFrame

  • +
  • from_arrow() - Create from Arrow data

  • +
+

See: datafusion.SessionContext

+
+
+
+
+

Expression Classes

+
+
Expr

Represents expressions that can be used in DataFrame operations.

+

See: datafusion.Expr

+
+
+

Functions for creating expressions:

+ +
+
+

Built-in Functions

+

DataFusion provides many built-in functions for data manipulation:

+ +

For a complete list of available functions, see the datafusion.functions module documentation.

+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/dataframe/rendering.html b/user-guide/dataframe/rendering.html new file mode 100644 index 000000000..91bac2c59 --- /dev/null +++ b/user-guide/dataframe/rendering.html @@ -0,0 +1,736 @@ + + + + + + + + HTML Rendering in Jupyter — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + + + + + + + + +
+ +
+ +
+

HTML Rendering in Jupyter

+

When working in Jupyter notebooks or other environments that support rich HTML display, +DataFusion DataFrames automatically render as nicely formatted HTML tables. This functionality +is provided by the _repr_html_ method, which is automatically called by Jupyter to provide +a richer visualization than plain text output.

+
+

Basic HTML Rendering

+

In a Jupyter environment, simply displaying a DataFrame object will trigger HTML rendering:

+
# Will display as HTML table in Jupyter
+df
+
+# Explicit display also uses HTML rendering
+display(df)
+
+
+
+
+

Customizing HTML Rendering

+

DataFusion provides extensive customization options for HTML table rendering through the +datafusion.html_formatter module.

+
+

Configuring the HTML Formatter

+

You can customize how DataFrames are rendered by configuring the formatter:

+
from datafusion.html_formatter import configure_formatter
+
+# Change the default styling
+configure_formatter(
+    max_cell_length=25,        # Maximum characters in a cell before truncation
+    max_width=1000,            # Maximum width in pixels
+    max_height=300,            # Maximum height in pixels
+    max_memory_bytes=2097152,  # Maximum memory for rendering (2MB)
+    min_rows=10,               # Minimum number of rows to display
+    max_rows=10,               # Maximum rows to display in __repr__
+    enable_cell_expansion=True,# Allow expanding truncated cells
+    custom_css=None,           # Additional custom CSS
+    show_truncation_message=True, # Show message when data is truncated
+    style_provider=None,       # Custom styling provider
+    use_shared_styles=True     # Share styles across tables
+)
+
+
+

The formatter settings affect all DataFrames displayed after configuration.

+
+
+
+

Custom Style Providers

+

For advanced styling needs, you can create a custom style provider:

+
from datafusion.html_formatter import StyleProvider, configure_formatter
+
+class MyStyleProvider(StyleProvider):
+    def get_table_styles(self):
+        return {
+            "table": "border-collapse: collapse; width: 100%;",
+            "th": "background-color: #007bff; color: white; padding: 8px; text-align: left;",
+            "td": "border: 1px solid #ddd; padding: 8px;",
+            "tr:nth-child(even)": "background-color: #f2f2f2;",
+        }
+
+    def get_value_styles(self, dtype, value):
+        """Return custom styles for specific values"""
+        if dtype == "float" and value < 0:
+            return "color: red;"
+        return None
+
+# Apply the custom style provider
+configure_formatter(style_provider=MyStyleProvider())
+
+
+
+
+

Performance Optimization with Shared Styles

+

The use_shared_styles parameter (enabled by default) optimizes performance when displaying +multiple DataFrames in notebook environments:

+
from datafusion.html_formatter import StyleProvider, configure_formatter
+# Default: Use shared styles (recommended for notebooks)
+configure_formatter(use_shared_styles=True)
+
+# Disable shared styles (each DataFrame includes its own styles)
+configure_formatter(use_shared_styles=False)
+
+
+

When use_shared_styles=True: +- CSS styles and JavaScript are included only once per notebook session +- This reduces HTML output size and prevents style duplication +- Improves rendering performance with many DataFrames +- Applies consistent styling across all DataFrames

+
+
+

Creating a Custom Formatter

+

For complete control over rendering, you can implement a custom formatter:

+
from datafusion.html_formatter import Formatter, get_formatter
+
+class MyFormatter(Formatter):
+    def format_html(self, batches, schema, has_more=False, table_uuid=None):
+        # Create your custom HTML here
+        html = "<div class='my-custom-table'>"
+        # ... formatting logic ...
+        html += "</div>"
+        return html
+
+# Set as the global formatter
+configure_formatter(formatter_class=MyFormatter)
+
+# Or use the formatter just for specific operations
+formatter = get_formatter()
+custom_html = formatter.format_html(batches, schema)
+
+
+
+
+

Managing Formatters

+

Reset to default formatting:

+
from datafusion.html_formatter import reset_formatter
+
+# Reset to default settings
+reset_formatter()
+
+
+

Get the current formatter settings:

+
from datafusion.html_formatter import get_formatter
+
+formatter = get_formatter()
+print(formatter.max_rows)
+print(formatter.theme)
+
+
+
+
+

Contextual Formatting

+

You can also use a context manager to temporarily change formatting settings:

+
from datafusion.html_formatter import formatting_context
+
+# Default formatting
+df.show()
+
+# Temporarily use different formatting
+with formatting_context(max_rows=100, theme="dark"):
+    df.show()  # Will use the temporary settings
+
+# Back to default formatting
+df.show()
+
+
+
+
+

Memory and Display Controls

+

You can control how much data is displayed and how much memory is used for rendering:

+
configure_formatter(
+    max_memory_bytes=4 * 1024 * 1024,  # 4MB maximum memory for display
+    min_rows=20,                       # Always show at least 20 rows
+    max_rows=50                        # Show up to 50 rows in output
+)
+
+
+

These parameters help balance comprehensive data display against performance considerations.

+
+
+

Best Practices

+
    +
  1. Global Configuration: Use configure_formatter() at the beginning of your notebook to set up consistent formatting for all DataFrames.

  2. +
  3. Memory Management: Set appropriate max_memory_bytes limits to prevent performance issues with large datasets.

  4. +
  5. Shared Styles: Keep use_shared_styles=True (default) for better performance in notebooks with multiple DataFrames.

  6. +
  7. Reset When Needed: Call reset_formatter() when you want to start fresh with default settings.

  8. +
  9. Cell Expansion: Use enable_cell_expansion=True when cells might contain longer content that users may want to see in full.

  10. +
+
+
+

Additional Resources

+ +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/introduction.html b/user-guide/introduction.html new file mode 100644 index 000000000..00a760dde --- /dev/null +++ b/user-guide/introduction.html @@ -0,0 +1,574 @@ + + + + + + + + Introduction — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Introduction

+

Welcome to the User Guide for the Python bindings of Arrow DataFusion. This guide aims to provide an introduction to +DataFusion through various examples and highlight the most effective ways of using it.

+
+

Installation

+

DataFusion is a Python library and, as such, can be installed via pip from PyPI.

+
pip install datafusion
+
+
+

You can verify the installation by running:

+
In [1]: import datafusion
+
+In [2]: datafusion.__version__
+Out[2]: '52.3.0'
+
+
+

In this documentation we will also show some examples for how DataFusion integrates +with Jupyter notebooks. To install and start a Jupyter labs session use

+
pip install jupyterlab
+jupyter lab
+
+
+

To demonstrate working with DataFusion, we need a data source. Later in the tutorial we will show +options for data sources. For our first example, we demonstrate using a Pokemon dataset that you +can download +here.

+

With that file in place you can use the following python example to view the DataFrame in +DataFusion.

+
In [3]: from datafusion import SessionContext
+
+In [4]: ctx = SessionContext()
+
+In [5]: df = ctx.read_csv("pokemon.csv")
+
+In [6]: df.show()
+DataFrame()
++----+---------------------------+--------+--------+-------+----+--------+---------+---------+---------+-------+------------+-----------+
+| #  | Name                      | Type 1 | Type 2 | Total | HP | Attack | Defense | Sp. Atk | Sp. Def | Speed | Generation | Legendary |
++----+---------------------------+--------+--------+-------+----+--------+---------+---------+---------+-------+------------+-----------+
+| 1  | Bulbasaur                 | Grass  | Poison | 318   | 45 | 49     | 49      | 65      | 65      | 45    | 1          | false     |
+| 2  | Ivysaur                   | Grass  | Poison | 405   | 60 | 62     | 63      | 80      | 80      | 60    | 1          | false     |
+| 3  | Venusaur                  | Grass  | Poison | 525   | 80 | 82     | 83      | 100     | 100     | 80    | 1          | false     |
+| 3  | VenusaurMega Venusaur     | Grass  | Poison | 625   | 80 | 100    | 123     | 122     | 120     | 80    | 1          | false     |
+| 4  | Charmander                | Fire   |        | 309   | 39 | 52     | 43      | 60      | 50      | 65    | 1          | false     |
+| 5  | Charmeleon                | Fire   |        | 405   | 58 | 64     | 58      | 80      | 65      | 80    | 1          | false     |
+| 6  | Charizard                 | Fire   | Flying | 534   | 78 | 84     | 78      | 109     | 85      | 100   | 1          | false     |
+| 6  | CharizardMega Charizard X | Fire   | Dragon | 634   | 78 | 130    | 111     | 130     | 85      | 100   | 1          | false     |
+| 6  | CharizardMega Charizard Y | Fire   | Flying | 634   | 78 | 104    | 78      | 159     | 115     | 100   | 1          | false     |
+| 7  | Squirtle                  | Water  |        | 314   | 44 | 48     | 65      | 50      | 64      | 43    | 1          | false     |
+| 8  | Wartortle                 | Water  |        | 405   | 59 | 63     | 80      | 65      | 80      | 58    | 1          | false     |
+| 9  | Blastoise                 | Water  |        | 530   | 79 | 83     | 100     | 85      | 105     | 78    | 1          | false     |
+| 9  | BlastoiseMega Blastoise   | Water  |        | 630   | 79 | 103    | 120     | 135     | 115     | 78    | 1          | false     |
+| 10 | Caterpie                  | Bug    |        | 195   | 45 | 30     | 35      | 20      | 20      | 45    | 1          | false     |
+| 11 | Metapod                   | Bug    |        | 205   | 50 | 20     | 55      | 25      | 25      | 30    | 1          | false     |
+| 12 | Butterfree                | Bug    | Flying | 395   | 60 | 45     | 50      | 90      | 80      | 70    | 1          | false     |
+| 13 | Weedle                    | Bug    | Poison | 195   | 40 | 35     | 30      | 20      | 20      | 50    | 1          | false     |
+| 14 | Kakuna                    | Bug    | Poison | 205   | 45 | 25     | 50      | 25      | 25      | 35    | 1          | false     |
+| 15 | Beedrill                  | Bug    | Poison | 395   | 65 | 90     | 40      | 45      | 80      | 75    | 1          | false     |
+| 15 | BeedrillMega Beedrill     | Bug    | Poison | 495   | 65 | 150    | 40      | 15      | 80      | 145   | 1          | false     |
++----+---------------------------+--------+--------+-------+----+--------+---------+---------+---------+-------+------------+-----------+
+
+
+

If you are working in a Jupyter notebook, you can also use the following to give you a table +display that may be easier to read.

+
display(df)
+
+
+Rendered table showing Pokemon DataFrame + +
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/io/arrow.html b/user-guide/io/arrow.html new file mode 100644 index 000000000..748fdbb54 --- /dev/null +++ b/user-guide/io/arrow.html @@ -0,0 +1,578 @@ + + + + + + + + Arrow — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Arrow

+

DataFusion implements the +Apache Arrow PyCapsule interface +for importing and exporting DataFrames with zero copy. With this feature, any Python +project that implements this interface can share data back and forth with DataFusion +with zero copy.

+

We can demonstrate using pyarrow.

+
+

Importing to DataFusion

+

Here we will create an Arrow table and import it to DataFusion.

+

To import an Arrow table, use datafusion.context.SessionContext.from_arrow(). +This will accept any Python object that implements +__arrow_c_stream__ +or __arrow_c_array__ +and returns a StructArray. Common pyarrow sources you can use are:

+ +
In [1]: from datafusion import SessionContext
+
+In [2]: import pyarrow as pa
+
+In [3]: data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+
+In [4]: table = pa.Table.from_pydict(data)
+
+In [5]: ctx = SessionContext()
+
+In [6]: df = ctx.from_arrow(table)
+
+In [7]: df
+Out[7]: 
+DataFrame()
++---+---+
+| a | b |
++---+---+
+| 1 | 4 |
+| 2 | 5 |
+| 3 | 6 |
++---+---+
+
+
+
+
+

Exporting from DataFusion

+

DataFusion DataFrames implement __arrow_c_stream__ PyCapsule interface, so any +Python library that accepts these can import a DataFusion DataFrame directly.

+

Invoking __arrow_c_stream__ triggers execution of the underlying query, but +batches are yielded incrementally rather than materialized all at once in memory. +Consumers can process the stream as it arrives. The stream executes lazily, +letting downstream readers pull batches on demand.

+
In [8]: from datafusion import col, lit
+
+In [9]: df = df.select((col("a") * lit(1.5)).alias("c"), lit("df").alias("d"))
+
+In [10]: pa.table(df)
+Out[10]: 
+pyarrow.Table
+c: double
+d: string_view not null
+----
+c: [[1.5,3,4.5]]
+d: [["df","df","df"]]
+
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/io/avro.html b/user-guide/io/avro.html new file mode 100644 index 000000000..bf41d6c8a --- /dev/null +++ b/user-guide/io/avro.html @@ -0,0 +1,503 @@ + + + + + + + + Avro — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ + + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Avro

+

Avro is a serialization format for record data. Reading an avro file is very straightforward +with read_avro()

+
from datafusion import SessionContext
+
+ctx = SessionContext()
+df = ctx.read_avro("file.avro")
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/io/csv.html b/user-guide/io/csv.html new file mode 100644 index 000000000..5b1933d99 --- /dev/null +++ b/user-guide/io/csv.html @@ -0,0 +1,526 @@ + + + + + + + + CSV — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ + + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

CSV

+

Reading a csv is very straightforward with read_csv()

+
from datafusion import SessionContext
+
+ctx = SessionContext()
+df = ctx.read_csv("file.csv")
+
+
+

An alternative is to use register_csv()

+
ctx.register_csv("file", "file.csv")
+df = ctx.table("file")
+
+
+

If you require additional control over how to read the CSV file, you can use +CsvReadOptions to set a variety of options.

+
from datafusion import CsvReadOptions
+options = (
+    CsvReadOptions()
+    .with_has_header(True) # File contains a header row
+    .with_delimiter(";") # Use ; as the delimiter instead of ,
+    .with_comment("#")  # Skip lines starting with #
+    .with_escape("\\")  # Escape character
+    .with_null_regex(r"^(null|NULL|N/A)$")  # Treat these as NULL
+    .with_truncated_rows(True) # Allow rows to have incomplete columns
+    .with_file_compression_type("gzip")  # Read gzipped CSV
+    .with_file_extension(".gz") # File extension other than .csv
+)
+df = ctx.read_csv("data.csv.gz", options=options)
+
+
+

Details for all CSV reading options can be found on the +DataFusion documentation site.

+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/io/index.html b/user-guide/io/index.html new file mode 100644 index 000000000..5dd714ddf --- /dev/null +++ b/user-guide/io/index.html @@ -0,0 +1,509 @@ + + + + + + + + IO — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ + + +
+ +
+ +
+ + +
+ + + + + + +
+ + + + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/io/json.html b/user-guide/io/json.html new file mode 100644 index 000000000..100317ffa --- /dev/null +++ b/user-guide/io/json.html @@ -0,0 +1,503 @@ + + + + + + + + JSON — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ + + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

JSON

+

JSON (JavaScript Object Notation) is a lightweight data-interchange format. +When it comes to reading a JSON file, using read_json() is a simple and easy

+
from datafusion import SessionContext
+
+ctx = SessionContext()
+df = ctx.read_json("file.json")
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/io/parquet.html b/user-guide/io/parquet.html new file mode 100644 index 000000000..6039c851e --- /dev/null +++ b/user-guide/io/parquet.html @@ -0,0 +1,507 @@ + + + + + + + + Parquet — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ + + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Parquet

+

It is quite simple to read a parquet file using the read_parquet() function.

+
from datafusion import SessionContext
+
+ctx = SessionContext()
+df = ctx.read_parquet("file.parquet")
+
+
+

An alternative is to use register_parquet()

+
ctx.register_parquet("file", "file.parquet")
+df = ctx.table("file")
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/io/table_provider.html b/user-guide/io/table_provider.html new file mode 100644 index 000000000..875d160a8 --- /dev/null +++ b/user-guide/io/table_provider.html @@ -0,0 +1,531 @@ + + + + + + + + Custom Table Provider — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ + + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Custom Table Provider

+

If you have a custom data source that you want to integrate with DataFusion, you can do so by +implementing the TableProvider +interface in Rust and then exposing it in Python. To do so, +you must use DataFusion 43.0.0 or later and expose a FFI_TableProvider +via PyCapsule.

+

A complete example can be found in the examples folder.

+
#[pymethods]
+impl MyTableProvider {
+
+    fn __datafusion_table_provider__<'py>(
+        &self,
+        py: Python<'py>,
+    ) -> PyResult<Bound<'py, PyCapsule>> {
+        let name = cr"datafusion_table_provider".into();
+
+        let provider = Arc::new(self.clone());
+        let provider = FFI_TableProvider::new(provider, false, None);
+
+        PyCapsule::new_bound(py, provider, Some(name.clone()))
+    }
+}
+
+
+

Once you have this library available, you can construct a +Table in Python and register it with the +SessionContext.

+
from datafusion import SessionContext, Table
+
+ctx = SessionContext()
+provider = MyTableProvider()
+
+ctx.register_table("capsule_table", provider)
+
+ctx.table("capsule_table").show()
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/sql.html b/user-guide/sql.html new file mode 100644 index 000000000..0eeb3a249 --- /dev/null +++ b/user-guide/sql.html @@ -0,0 +1,646 @@ + + + + + + + + SQL — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

SQL

+

DataFusion also offers a SQL API, read the full reference here

+
In [1]: import datafusion
+
+In [2]: from datafusion import DataFrame, SessionContext
+
+# create a context
+In [3]: ctx = datafusion.SessionContext()
+
+# register a CSV
+In [4]: ctx.register_csv("pokemon", "pokemon.csv")
+
+# create a new statement via SQL
+In [5]: df = ctx.sql('SELECT "Attack"+"Defense", "Attack"-"Defense" FROM pokemon')
+
+# collect and convert to pandas DataFrame
+In [6]: df.to_pandas()
+Out[6]: 
+     pokemon.Attack + pokemon.Defense  pokemon.Attack - pokemon.Defense
+0                                  98                                 0
+1                                 125                                -1
+2                                 165                                -1
+3                                 223                               -23
+4                                  95                                 9
+..                                ...                               ...
+158                               190                                10
+159                               109                                19
+160                               149                                19
+161                               229                                39
+162                               200                                20
+
+[163 rows x 2 columns]
+
+
+
+

Parameterized queries

+

In DataFusion-Python 51.0.0 we introduced the ability to pass parameters +in a SQL query. These are similar in concept to +prepared statements, +but allow passing named parameters into a SQL query. Consider this simple +example.

+
In [7]: def show_attacks(ctx: SessionContext, threshold: int) -> None:
+   ...:     ctx.sql(
+   ...:         'SELECT "Name", "Attack" FROM pokemon WHERE "Attack" > $val', val=threshold
+   ...:     ).show(num=5)
+   ...: show_attacks(ctx, 75)
+   ...: 
+DataFrame()
++---------------------------+--------+
+| Name                      | Attack |
++---------------------------+--------+
+| Venusaur                  | 82     |
+| VenusaurMega Venusaur     | 100    |
+| Charizard                 | 84     |
+| CharizardMega Charizard X | 130    |
+| CharizardMega Charizard Y | 104    |
++---------------------------+--------+
+
+
+

When passing parameters like the example above we convert the Python objects +into their string representation. We also have special case handling +for DataFrame objects, since they cannot simply +be turned into string representations for an SQL query. In these cases we +will register a temporary view in the SessionContext +using a generated table name.

+

The formatting for passing string replacement objects is to precede the +variable name with a single $. This works for all dialects in +the SQL parser except hive and mysql. Since these dialects do not +support named placeholders, we are unable to do this type of replacement. +We recommend either switching to another dialect or using Python +f-string style replacement.

+
+

Warning

+

To support DataFrame parameterized queries, your session must support +registration of temporary views. The default +CatalogProvider and +SchemaProvider do have this capability. +If you have implemented custom providers, it is important that temporary +views do not persist across SessionContext +or you may get unintended consequences.

+
+

The following example shows passing in both a DataFrame +object as well as a Python object to be used in parameterized replacement.

+
In [8]: def show_column(
+   ...:     ctx: SessionContext, column: str, df: DataFrame, threshold: int
+   ...: ) -> None:
+   ...:     ctx.sql(
+   ...:         'SELECT "Name", $col FROM $df WHERE $col > $val',
+   ...:         col=column,
+   ...:         df=df,
+   ...:         val=threshold,
+   ...:     ).show(num=5)
+   ...: df = ctx.table("pokemon")
+   ...: show_column(ctx, '"Defense"', df, 75)
+   ...: 
+DataFrame()
++---------------------------+---------+
+| Name                      | Defense |
++---------------------------+---------+
+| Venusaur                  | 83      |
+| VenusaurMega Venusaur     | 123     |
+| Charizard                 | 78      |
+| CharizardMega Charizard X | 111     |
+| CharizardMega Charizard Y | 78      |
++---------------------------+---------+
+
+
+

The approach implemented for conversion of variables into a SQL query +relies on string conversion. This has the potential for data loss, +specifically for cases like floating point numbers. If you need to pass +variables into a parameterized query and it is important to maintain the +original value without conversion to a string, then you can use the +optional parameter param_values to specify these. This parameter +expects a dictionary mapping from the parameter name to a Python +object. Those objects will be cast into a +PyArrow Scalar Value.

+

Using param_values will rely on the SQL dialect you have configured +for your session. This can be set using the configuration options +of your SessionContext. Similar to how +prepared statements +work, these parameters are limited to places where you would pass in a +scalar value, such as a comparison.

+
In [9]: def param_attacks(ctx: SessionContext, threshold: int) -> None:
+   ...:     ctx.sql(
+   ...:         'SELECT "Name", "Attack" FROM pokemon WHERE "Attack" > $val',
+   ...:         param_values={"val": threshold},
+   ...:     ).show(num=5)
+   ...: param_attacks(ctx, 75)
+   ...: 
+DataFrame()
++---------------------------+--------+
+| Name                      | Attack |
++---------------------------+--------+
+| Venusaur                  | 82     |
+| VenusaurMega Venusaur     | 100    |
+| Charizard                 | 84     |
+| CharizardMega Charizard X | 130    |
+| CharizardMega Charizard Y | 104    |
++---------------------------+--------+
+
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/user-guide/upgrade-guides.html b/user-guide/upgrade-guides.html new file mode 100644 index 000000000..97b92fc4a --- /dev/null +++ b/user-guide/upgrade-guides.html @@ -0,0 +1,571 @@ + + + + + + + + Upgrade Guides — Apache Arrow DataFusion documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ + +
+ + + + +
+ + +
+ +
+ On this page +
+ + +
+ +
+ +
+ + +
+ + + + + + +
+ +
+ +
+

Upgrade Guides

+
+

DataFusion 52.0.0

+

This version includes a major update to the Python Extensions due to upgrades +to the Foreign Function Interface. +Users who contribute their own CatalogProvider, SchemaProvider, +TableProvider or TableFunction via FFI must now provide access to a +LogicalExtensionCodec and a TaskContextProvider. The function signatures +for the methods to get these PyCapsule objects now requires an additional +parameter, which is a Python object that can be used to extract the +FFI_LogicalExtensionCodec that is necessary.

+

A complete example can be found in the FFI example. +Your methods need to be updated to take an additional parameter like in this +example.

+
#[pymethods]
+impl MyCatalogProvider {
+    pub fn __datafusion_catalog_provider__<'py>(
+        &self,
+        py: Python<'py>,
+        session: Bound<PyAny>,
+    ) -> PyResult<Bound<'py, PyCapsule>> {
+        let name = cr"datafusion_catalog_provider".into();
+
+        let provider = Arc::clone(&self.inner) as Arc<dyn CatalogProvider + Send>;
+
+        let codec = ffi_logical_codec_from_pycapsule(session)?;
+        let provider = FFI_CatalogProvider::new_with_ffi_codec(provider, None, codec);
+
+        PyCapsule::new(py, provider, Some(name))
+    }
+}
+
+
+

To extract the logical extension codec FFI object from the provided object you +can implement a helper method such as:

+
pub(crate) fn ffi_logical_codec_from_pycapsule(
+    obj: Bound<PyAny>,
+) -> PyResult<FFI_LogicalExtensionCodec> {
+    let attr_name = "__datafusion_logical_extension_codec__";
+    let capsule = if obj.hasattr(attr_name)? {
+        obj.getattr(attr_name)?.call0()?
+    } else {
+        obj
+    };
+
+    let capsule = capsule.downcast::<PyCapsule>()?;
+    validate_pycapsule(capsule, "datafusion_logical_extension_codec")?;
+
+    let codec = unsafe { capsule.reference::<FFI_LogicalExtensionCodec>() };
+
+    Ok(codec.clone())
+}
+
+
+

The DataFusion FFI interface updates no longer depend directly on the +datafusion core crate. You can improve your build times and potentially +reduce your library binary size by removing this dependency and instead +using the specific datafusion project crates.

+

For example, instead of including expressions like:

+
use datafusion::catalog::MemTable;
+
+
+

Instead you can now write:

+
use datafusion_catalog::MemTable;
+
+
+
+
+ + +
+ + + + + +
+ + +
+
+ + + + + + + + + \ No newline at end of file diff --git a/uv.lock b/uv.lock deleted file mode 100644 index f4926521b..000000000 --- a/uv.lock +++ /dev/null @@ -1,1671 +0,0 @@ -version = 1 -revision = 3 -requires-python = ">=3.10" -resolution-markers = [ - "python_full_version >= '3.14'", - "python_full_version >= '3.12' and python_full_version < '3.14'", - "python_full_version == '3.11.*'", - "python_full_version < '3.11'", -] - -[[package]] -name = "alabaster" -version = "1.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a6/f8/d9c74d0daf3f742840fd818d69cfae176fa332022fd44e3469487d5a9420/alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e", size = 24210, upload-time = "2024-07-26T18:15:03.762Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929, upload-time = "2024-07-26T18:15:02.05Z" }, -] - -[[package]] -name = "arro3-core" -version = "0.6.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.12'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/2a/01/f06342d2eb822153f63d188153e41fbeabb29b48247f7a11ce76c538f7d1/arro3_core-0.6.5.tar.gz", hash = "sha256:768078887cd7ac82de4736f94bbd91f6d660f10779848bd5b019f511badd9d75", size = 107522, upload-time = "2025-10-13T23:12:38.872Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7a/8a/24b35cf01a68621f5f07e3191ca96f70a145022ca367347266901eb504a7/arro3_core-0.6.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:da193dc2fb8c2005d0b3887b09d1a90d42cec1f59f17a8a1a5791f0de90946ae", size = 2678116, upload-time = "2025-10-13T23:09:04.198Z" }, - { url = "https://files.pythonhosted.org/packages/5a/7a/4398bb0582fb22d575f256f2b9ac7be735c765222cc61fb214d606bdb77c/arro3_core-0.6.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed1a760ec39fe19c65e98f45515582408002d0212df5db227a5959ffeb07ad4a", size = 2383214, upload-time = "2025-10-13T23:09:06.841Z" }, - { url = "https://files.pythonhosted.org/packages/82/3f/a321501c5da4bf3ff7438c3e5eb6e63bcecb5630c0f4a89a017cbfa8e4a0/arro3_core-0.6.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6584a3d28007740afcef1e301332876e2b785bd8edd59a458a6bc9b051bce052", size = 2883536, upload-time = "2025-10-13T23:09:08.877Z" }, - { url = "https://files.pythonhosted.org/packages/0d/50/1d1e55b9a8c4cf2fdeb954947aa135010554a3333b709e8cad3d5d084be2/arro3_core-0.6.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8e0af4789618f02bead4a0cd4d0a54abd9c8aa4fcedf9872b4891d2e3e984161", size = 2908828, upload-time = "2025-10-13T23:09:10.958Z" }, - { url = "https://files.pythonhosted.org/packages/12/75/b4b1de1ccb17890bada9a3f4131cf3137f145d5d10490db51de6b8799926/arro3_core-0.6.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c73f212e549e9b6d11cfe3f14bbf3fba9d0891426afb5916688d16d0df724085", size = 3145458, upload-time = "2025-10-13T23:09:13.275Z" }, - { url = "https://files.pythonhosted.org/packages/08/4f/f42ce1840490fd0863bfbc56f28eaaec3bcb4eb322079af9c070111657e5/arro3_core-0.6.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f88f62e4e276a9e84f250722d2e5ffc078af9a3f67ac691f572a0e05dd6095", size = 2775793, upload-time = "2025-10-13T23:09:15.342Z" }, - { url = "https://files.pythonhosted.org/packages/2b/aa/9637efc8d8733c34bedef44e5b2c170dea14d15ab56b3566d8d7963c2616/arro3_core-0.6.5-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:b2635e4c227f25ff8784dc8efb38cb7c1674646cfdc68ded53f2426289885f0e", size = 2516697, upload-time = "2025-10-13T23:09:17.584Z" }, - { url = "https://files.pythonhosted.org/packages/60/84/1fcfadf956bc25eb5251b1ea7a7099f05198a55764635d2fc9ceafdbdbd1/arro3_core-0.6.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a5f3e936686bcd8542fafc94c68fdb23ec42d1d51a4777967ae815c90aff7296", size = 3023625, upload-time = "2025-10-13T23:09:21.556Z" }, - { url = "https://files.pythonhosted.org/packages/58/d0/52d0cb3c0dfa8e94ba2118b7e91a70da76d6ede9de4e70374f831f38cfdf/arro3_core-0.6.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:705c32fec03dadc08f807d69ce557882005d43eb20ec62699f7036340f0d580f", size = 2701346, upload-time = "2025-10-13T23:09:25.031Z" }, - { url = "https://files.pythonhosted.org/packages/69/bf/42a6f6501805c31cb65d8a6e3379eeec4fa6c26dc07c9ce894f363ccad1c/arro3_core-0.6.5-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:56d8166235a4c54e4f7ba082ec76890c820fa8c1b6c995ec59cead62a9698e59", size = 3153207, upload-time = "2025-10-13T23:09:28.254Z" }, - { url = "https://files.pythonhosted.org/packages/4f/e5/41fdee468b33759b42958347c2d70b0461bf8f70ba1762a94cdf2e9b0142/arro3_core-0.6.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1ba43ba9081c00767083195222b6be74913de668296f55599658c4b0bb7cd327", size = 3105033, upload-time = "2025-10-13T23:09:31.545Z" }, - { url = "https://files.pythonhosted.org/packages/03/e0/b6d733b4540c05bac546162e045b547031f4d88c67b7c864929d9bce29ad/arro3_core-0.6.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4f5df13c6742e3f0b494cfe9025dccdc8426a74cc9e3e5a1239311e07a4b24e0", size = 2954793, upload-time = "2025-10-13T23:09:34.988Z" }, - { url = "https://files.pythonhosted.org/packages/c0/34/8353ba79c8d0498eaacc077d58b384ef785e0b69c9cbff7c2580136b8fe3/arro3_core-0.6.5-cp310-cp310-win_amd64.whl", hash = "sha256:34676b728178236df63c9ea10b21432392d4b5bb51e2030e77c68eed4dede2ad", size = 2837495, upload-time = "2025-10-13T23:09:38.539Z" }, - { url = "https://files.pythonhosted.org/packages/78/85/20e46d3ed59d2f93be4a4d1abea4f6bef3e96acd59bf5a50726f84303c51/arro3_core-0.6.5-cp311-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9d5999506daec1ab31096b3deb1e3573041d6ecadb4ca99c96f7ab26720c592c", size = 2685615, upload-time = "2025-10-13T23:09:41.793Z" }, - { url = "https://files.pythonhosted.org/packages/d0/9c/427d578f7d2bf3149515a8b75217e7189e7b1d74e5c5609e1a7e7f0f8d3c/arro3_core-0.6.5-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:bd3e251184c2dd6ade81c5613256b6d85ab3ddbd5af838b1de657e0ddec017f8", size = 2391944, upload-time = "2025-10-13T23:09:45.266Z" }, - { url = "https://files.pythonhosted.org/packages/90/24/7e4af478eb889bfa401e1c1b8868048ca692e6205affbf81cf3666347852/arro3_core-0.6.5-cp311-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7cadb29349960d3821b0515d9df80f2725cea155ad966c699f6084de32e313cb", size = 2888376, upload-time = "2025-10-13T23:09:48.737Z" }, - { url = "https://files.pythonhosted.org/packages/70/3b/01006a96bc980275aa4d2eb759c5f10afb7c85fcdce3c36ddb18635ad23b/arro3_core-0.6.5-cp311-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a922e560ed2ccee3293d51b39e013b51cc233895d25ddafcacfb83c540a19e6f", size = 2916568, upload-time = "2025-10-13T23:09:51.95Z" }, - { url = "https://files.pythonhosted.org/packages/a2/2f/4e04c7f5687de6fb6f88aa7590b16bcf507ba17ddbd268525f27b70b7a68/arro3_core-0.6.5-cp311-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:68fe6672bf51f039b12046a209cba0a9405e10ae44e5a0d557f091b356a62051", size = 3144223, upload-time = "2025-10-13T23:09:55.387Z" }, - { url = "https://files.pythonhosted.org/packages/31/4a/72dc383d1a0d14f1d453e334e3461e229762edb1bf3f75b3ab977e9386ed/arro3_core-0.6.5-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c3ee95603e375401a58ff763ce2c8aa858e0c4f757c1fb719f48fb070f540b2", size = 2781862, upload-time = "2025-10-13T23:09:59.035Z" }, - { url = "https://files.pythonhosted.org/packages/14/dc/0df7684b683114eaf8e57989b4230edb359cbfb6e98b8770d69128b27572/arro3_core-0.6.5-cp311-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:fbaf6b65213630007b798b565e0701c2092a330deeba16bd3d896d401f7e9f28", size = 2522442, upload-time = "2025-10-13T23:10:02.134Z" }, - { url = "https://files.pythonhosted.org/packages/c9/04/75f8627cd7fe4d103eca51760d50269cfbc0bf6beaf83a3cdefb4ebd37c7/arro3_core-0.6.5-cp311-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:20679f874558bb2113e96325522625ec64a72687000b7a9578031a4d082c6ef5", size = 3033454, upload-time = "2025-10-13T23:10:05.192Z" }, - { url = "https://files.pythonhosted.org/packages/ea/19/f2d54985da65bf6d3da76218bee56383285035541c8d0cadb53095845b3e/arro3_core-0.6.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d82d6ec32d5c7c73057fb9c528390289fd5bc94b8d8f28fca9c56fc8e41c412c", size = 2705984, upload-time = "2025-10-13T23:10:08.518Z" }, - { url = "https://files.pythonhosted.org/packages/6c/53/b1d7742d6db7b4aa44d3785956955d651b3ac36db321625fd15466be1aca/arro3_core-0.6.5-cp311-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:4cba4db0a4203a3ccf131c3fb7804d77f0740d6165ec9efa3aa3acbca87c43a3", size = 3157472, upload-time = "2025-10-13T23:10:11.976Z" }, - { url = "https://files.pythonhosted.org/packages/05/31/68711327dbdd480aed54158fc1c46ab245e860ab0286e0916ce788f9889e/arro3_core-0.6.5-cp311-abi3-musllinux_1_2_i686.whl", hash = "sha256:e358affc4a0fe5c1b5dccf4f92c43a836aaa4c4eab0906c83b00b60275de3b6d", size = 3117099, upload-time = "2025-10-13T23:10:15.374Z" }, - { url = "https://files.pythonhosted.org/packages/31/e3/15ffca0797d9500b23759ae4477cf052fde8dd47a3890f4e4e1d04639016/arro3_core-0.6.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:324e43f07b7681846d00a8995b78bdc4b4a719047aa0d34426b462b8f208ee98", size = 2963677, upload-time = "2025-10-13T23:10:18.828Z" }, - { url = "https://files.pythonhosted.org/packages/bc/02/69e60dbe3bbe2bfc8b6dfa4f4bfcb8d1dd240a137bf2a5f7bcc84703f05c/arro3_core-0.6.5-cp311-abi3-win_amd64.whl", hash = "sha256:285f802c8a42fe29ecb84584d1700bc4c4f974552b75f805e1f4362d28b97080", size = 2850445, upload-time = "2025-10-13T23:10:22.345Z" }, - { url = "https://files.pythonhosted.org/packages/b1/29/2e5b091f6b5cffb6489dbe7ed353841568dde8ac4d1232c77321da1d0925/arro3_core-0.6.5-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:8c20e69c3b3411fd6ed56091f388e699072651e880e682be5bd14f3a392ed3e8", size = 2671985, upload-time = "2025-10-13T23:10:25.515Z" }, - { url = "https://files.pythonhosted.org/packages/30/74/764ac4b58fef3fdfc655416c42349206156db5c687fa24a0674acaeaadbb/arro3_core-0.6.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:92211f1d03221ff74d0b535a576b39601083d8e98e9d47228314573f9d4f9ae2", size = 2382931, upload-time = "2025-10-13T23:10:29.893Z" }, - { url = "https://files.pythonhosted.org/packages/6a/07/bd8c92e218240ae8a30150a5d7a2dab359b452ab54a8bb7b90effe806e3d/arro3_core-0.6.5-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:280d933b75f2649779d76e32a07f91d2352a952f2c97ddf7b320e267f440cd42", size = 2879900, upload-time = "2025-10-13T23:10:33.238Z" }, - { url = "https://files.pythonhosted.org/packages/0f/d4/253725019fe2ae5f5fde87928118ffa568cc59f07b2d6a0e90620938c537/arro3_core-0.6.5-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfc3f6b93b924f43fb7985b06202343c30b43da6bd5055ba8b84eda431e494d4", size = 2904149, upload-time = "2025-10-13T23:10:36.547Z" }, - { url = "https://files.pythonhosted.org/packages/f0/b0/7a3dea641ac8de041c1a34859a2f2a82d3cdf3c3360872101c1d198a1e24/arro3_core-0.6.5-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a5963635eb698ebc7da689e641f68b3998864bab894cf0ca84bd058b8c60d97f", size = 3143477, upload-time = "2025-10-13T23:10:40.232Z" }, - { url = "https://files.pythonhosted.org/packages/a7/05/1a50575be33fe9240898a1b5a8574658a905b5675865285585e070dcf7e2/arro3_core-0.6.5-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac291b3e74b57e56e03373d57530540cbbbfd92e4219fe2778ea531006673fe9", size = 2776522, upload-time = "2025-10-13T23:10:43.413Z" }, - { url = "https://files.pythonhosted.org/packages/2e/bd/e7b03207e7906e94e327cd4190fdb2d26ae52bc4ee1edeb057fed760796b/arro3_core-0.6.5-cp313-cp313t-manylinux_2_24_aarch64.whl", hash = "sha256:5d3f4cc58a654037d61f61ba230419da2c8f88a0ac82b9d41fe307f7cf9fda97", size = 2515426, upload-time = "2025-10-13T23:10:46.926Z" }, - { url = "https://files.pythonhosted.org/packages/f9/ed/82d1febd5c104eccdfb82434e3619125c328c36da143e19dfa3c86de4a81/arro3_core-0.6.5-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:93cddac90238d64451f5e66c630ded89d0b5fd6d2c099bf3a5151dde2c1ddf1d", size = 3024759, upload-time = "2025-10-13T23:10:50.281Z" }, - { url = "https://files.pythonhosted.org/packages/da/cd/00e06907e42e404c21eb08282dee94ac7a1961facfa9a96d116829031721/arro3_core-0.6.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1fa7ac10db5846c33f4e8b66a6eaa705d84998e38575a835acac9a6a6649933d", size = 2700191, upload-time = "2025-10-13T23:10:53.776Z" }, - { url = "https://files.pythonhosted.org/packages/a3/11/a4bb9a900f456a6905d481bd2289f7a2371dcde024de56779621fd6a92c3/arro3_core-0.6.5-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:ca69f698a065cdbf845d59d412bc204e8f8af12f93737d82e6a18f3cff812349", size = 3149963, upload-time = "2025-10-13T23:10:57.163Z" }, - { url = "https://files.pythonhosted.org/packages/28/8a/79c76ad88b16f2fac25684f7313593738f353355eb1af2307e43efd7b1ca/arro3_core-0.6.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:de74a2512e2e2366d4b064c498c38672bf6ddea38acec8b1999b4e66182dd001", size = 3104663, upload-time = "2025-10-13T23:11:00.582Z" }, - { url = "https://files.pythonhosted.org/packages/20/66/9152feaa87f851a37c1a2bd74fb89d7e82e4c76447ee590bf8e6fff5e9d8/arro3_core-0.6.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:806ca8e20507675b2de68b3d009f76e898cc3c3e441c834ea5220866f68aac50", size = 2956440, upload-time = "2025-10-13T23:11:03.769Z" }, - { url = "https://files.pythonhosted.org/packages/ad/66/f4179ef64d5c18fe76ec93cfbff42c0f401438ef771c6766b880044d7e13/arro3_core-0.6.5-cp313-cp313t-win_amd64.whl", hash = "sha256:8f6f0cc78877ade7ad6e678a4671b191406547e7b407bc9637436869c017ed47", size = 2845345, upload-time = "2025-10-13T23:11:07.447Z" }, - { url = "https://files.pythonhosted.org/packages/10/ca/b2139dbb25f9fefb9b1cdce8a73785615de6763af6a16bf6ff96a3b630f2/arro3_core-0.6.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:26d5b50139f1a96727fa1760b4d70393acf5ee0fba45346ad2d4f69824d3bdc2", size = 2676788, upload-time = "2025-10-13T23:11:56.965Z" }, - { url = "https://files.pythonhosted.org/packages/34/a1/c68dde2944f493c8ccfcb91bf6da6d27a27c3674316dd09c9560f9e6ab1a/arro3_core-0.6.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b65b3d8d7f65f2f3c36002dc467380d7a31ea771132986dddc6341c5a9dc726f", size = 2382809, upload-time = "2025-10-13T23:12:00.175Z" }, - { url = "https://files.pythonhosted.org/packages/c6/fc/2fb81d42a3cecd632deace97dc23ac74083d60d158106440c783bae4ff01/arro3_core-0.6.5-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6c3442a79a757ed3fbd7793de180019ae3201f04237537c2e2e3f1e3dd99b31c", size = 2882818, upload-time = "2025-10-13T23:12:03.721Z" }, - { url = "https://files.pythonhosted.org/packages/58/7f/16f741e1d49ba5c5a893ce6f8eb0283d64bc68d6cc9e07ac62f96eaadfae/arro3_core-0.6.5-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:def7b0065a684d6f903a658d2567da47e2fcecde716e0b34eff4d899c6468c8d", size = 2907503, upload-time = "2025-10-13T23:12:07.066Z" }, - { url = "https://files.pythonhosted.org/packages/eb/45/2eb7972e0bbec0ee0ab22b0f166ec1ea74b53bd76c93a18ced434713e495/arro3_core-0.6.5-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbfe2f2d4d0d393833cd6a4bd9c15266a02307a3028f159155a1c536469c3ae7", size = 3143706, upload-time = "2025-10-13T23:12:10.492Z" }, - { url = "https://files.pythonhosted.org/packages/2d/af/b78e28842faa675e4e6c4d82e861accf21ac08bbab80a65fa80c578f80a1/arro3_core-0.6.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a191a3e4f72c34f7ace7724a94f2d90b06c804a6cbece4ae0f18d36325479cf3", size = 2775462, upload-time = "2025-10-13T23:12:14.026Z" }, - { url = "https://files.pythonhosted.org/packages/45/df/950e57e4915e0457acadaaca13c4423d5e2652e403135eb7606d5e6e5443/arro3_core-0.6.5-pp310-pypy310_pp73-manylinux_2_24_aarch64.whl", hash = "sha256:e3f6ab4c6ea96c451eff72aa6c5b9835a0ea8a9847cfe3995c88cce0c7701fb5", size = 2516212, upload-time = "2025-10-13T23:12:17.548Z" }, - { url = "https://files.pythonhosted.org/packages/07/73/821640d0827a829ed2565c2d4812080ab7fb86f0d271b462f9b37e6d946e/arro3_core-0.6.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:27df5239835330299636a02977f2cb34d5c460cc03b2ae1d6ab6a03d28051b08", size = 3023342, upload-time = "2025-10-13T23:12:21.308Z" }, - { url = "https://files.pythonhosted.org/packages/fd/30/51302d2f4d1b627dd11e2be979f2c48550b782d8d58d0378316342e284a8/arro3_core-0.6.5-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:71dce89c0e91be4cfb42591f03809235bbc374c396e08acdf93c4d85b09e40f5", size = 2700740, upload-time = "2025-10-13T23:12:24.968Z" }, - { url = "https://files.pythonhosted.org/packages/1d/e8/0c8a345a013bb64abea60b4864bacc01e43b8699b8874794baec9c8a7e76/arro3_core-0.6.5-pp310-pypy310_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:d380c28f85568ed99c1686fb9d64b5a811d76d569f367cbec8ef7e58f6e2fdf9", size = 3152749, upload-time = "2025-10-13T23:12:28.393Z" }, - { url = "https://files.pythonhosted.org/packages/6a/42/003b30c4da394366d5967a5b993f7471a74182c983d8f757891b3dd5d594/arro3_core-0.6.5-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:8e359c0c4fe9992f5a863a4a31502ea58eb2f92988fc2e501850540b3eff0328", size = 3104676, upload-time = "2025-10-13T23:12:31.711Z" }, - { url = "https://files.pythonhosted.org/packages/0b/fd/4f8dac58ea17e05978bf35cb9a3e485b1ff3cdd6e2cc29deb08f54080de4/arro3_core-0.6.5-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:9a58acbc61480b533aa84d735db04b1e68fc7f6807ab694d606c03b5e694d83d", size = 2954405, upload-time = "2025-10-13T23:12:35.328Z" }, -] - -[[package]] -name = "astroid" -version = "3.3.8" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/80/c5/5c83c48bbf547f3dd8b587529db7cf5a265a3368b33e85e76af8ff6061d3/astroid-3.3.8.tar.gz", hash = "sha256:a88c7994f914a4ea8572fac479459f4955eeccc877be3f2d959a33273b0cf40b", size = 398196, upload-time = "2024-12-24T01:13:05.59Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/07/28/0bc8a17d6cd4cc3c79ae41b7105a2b9a327c110e5ddd37a8a27b29a5c8a2/astroid-3.3.8-py3-none-any.whl", hash = "sha256:187ccc0c248bfbba564826c26f070494f7bc964fd286b6d9fff4420e55de828c", size = 275153, upload-time = "2024-12-24T01:13:02.726Z" }, -] - -[[package]] -name = "asttokens" -version = "3.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978, upload-time = "2024-11-30T04:30:14.439Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918, upload-time = "2024-11-30T04:30:10.946Z" }, -] - -[[package]] -name = "babel" -version = "2.16.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2a/74/f1bc80f23eeba13393b7222b11d95ca3af2c1e28edca18af487137eefed9/babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316", size = 9348104, upload-time = "2024-08-08T14:25:45.459Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b", size = 9587599, upload-time = "2024-08-08T14:25:42.686Z" }, -] - -[[package]] -name = "beautifulsoup4" -version = "4.12.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "soupsieve" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b3/ca/824b1195773ce6166d388573fc106ce56d4a805bd7427b624e063596ec58/beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051", size = 581181, upload-time = "2024-01-17T16:53:17.902Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed", size = 147925, upload-time = "2024-01-17T16:53:12.779Z" }, -] - -[[package]] -name = "certifi" -version = "2024.12.14" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0f/bd/1d41ee578ce09523c81a15426705dd20969f5abf006d1afe8aeff0dd776a/certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db", size = 166010, upload-time = "2024-12-14T13:52:38.02Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a5/32/8f6669fc4798494966bf446c8c4a162e0b5d893dff088afddf76414f70e1/certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56", size = 164927, upload-time = "2024-12-14T13:52:36.114Z" }, -] - -[[package]] -name = "cffi" -version = "1.17.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pycparser" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621, upload-time = "2024-09-04T20:45:21.852Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/90/07/f44ca684db4e4f08a3fdc6eeb9a0d15dc6883efc7b8c90357fdbf74e186c/cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14", size = 182191, upload-time = "2024-09-04T20:43:30.027Z" }, - { url = "https://files.pythonhosted.org/packages/08/fd/cc2fedbd887223f9f5d170c96e57cbf655df9831a6546c1727ae13fa977a/cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67", size = 178592, upload-time = "2024-09-04T20:43:32.108Z" }, - { url = "https://files.pythonhosted.org/packages/de/cc/4635c320081c78d6ffc2cab0a76025b691a91204f4aa317d568ff9280a2d/cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382", size = 426024, upload-time = "2024-09-04T20:43:34.186Z" }, - { url = "https://files.pythonhosted.org/packages/b6/7b/3b2b250f3aab91abe5f8a51ada1b717935fdaec53f790ad4100fe2ec64d1/cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702", size = 448188, upload-time = "2024-09-04T20:43:36.286Z" }, - { url = "https://files.pythonhosted.org/packages/d3/48/1b9283ebbf0ec065148d8de05d647a986c5f22586b18120020452fff8f5d/cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3", size = 455571, upload-time = "2024-09-04T20:43:38.586Z" }, - { url = "https://files.pythonhosted.org/packages/40/87/3b8452525437b40f39ca7ff70276679772ee7e8b394934ff60e63b7b090c/cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6", size = 436687, upload-time = "2024-09-04T20:43:40.084Z" }, - { url = "https://files.pythonhosted.org/packages/8d/fb/4da72871d177d63649ac449aec2e8a29efe0274035880c7af59101ca2232/cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17", size = 446211, upload-time = "2024-09-04T20:43:41.526Z" }, - { url = "https://files.pythonhosted.org/packages/ab/a0/62f00bcb411332106c02b663b26f3545a9ef136f80d5df746c05878f8c4b/cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8", size = 461325, upload-time = "2024-09-04T20:43:43.117Z" }, - { url = "https://files.pythonhosted.org/packages/36/83/76127035ed2e7e27b0787604d99da630ac3123bfb02d8e80c633f218a11d/cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e", size = 438784, upload-time = "2024-09-04T20:43:45.256Z" }, - { url = "https://files.pythonhosted.org/packages/21/81/a6cd025db2f08ac88b901b745c163d884641909641f9b826e8cb87645942/cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be", size = 461564, upload-time = "2024-09-04T20:43:46.779Z" }, - { url = "https://files.pythonhosted.org/packages/f8/fe/4d41c2f200c4a457933dbd98d3cf4e911870877bd94d9656cc0fcb390681/cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c", size = 171804, upload-time = "2024-09-04T20:43:48.186Z" }, - { url = "https://files.pythonhosted.org/packages/d1/b6/0b0f5ab93b0df4acc49cae758c81fe4e5ef26c3ae2e10cc69249dfd8b3ab/cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15", size = 181299, upload-time = "2024-09-04T20:43:49.812Z" }, - { url = "https://files.pythonhosted.org/packages/6b/f4/927e3a8899e52a27fa57a48607ff7dc91a9ebe97399b357b85a0c7892e00/cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401", size = 182264, upload-time = "2024-09-04T20:43:51.124Z" }, - { url = "https://files.pythonhosted.org/packages/6c/f5/6c3a8efe5f503175aaddcbea6ad0d2c96dad6f5abb205750d1b3df44ef29/cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf", size = 178651, upload-time = "2024-09-04T20:43:52.872Z" }, - { url = "https://files.pythonhosted.org/packages/94/dd/a3f0118e688d1b1a57553da23b16bdade96d2f9bcda4d32e7d2838047ff7/cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4", size = 445259, upload-time = "2024-09-04T20:43:56.123Z" }, - { url = "https://files.pythonhosted.org/packages/2e/ea/70ce63780f096e16ce8588efe039d3c4f91deb1dc01e9c73a287939c79a6/cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41", size = 469200, upload-time = "2024-09-04T20:43:57.891Z" }, - { url = "https://files.pythonhosted.org/packages/1c/a0/a4fa9f4f781bda074c3ddd57a572b060fa0df7655d2a4247bbe277200146/cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1", size = 477235, upload-time = "2024-09-04T20:44:00.18Z" }, - { url = "https://files.pythonhosted.org/packages/62/12/ce8710b5b8affbcdd5c6e367217c242524ad17a02fe5beec3ee339f69f85/cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6", size = 459721, upload-time = "2024-09-04T20:44:01.585Z" }, - { url = "https://files.pythonhosted.org/packages/ff/6b/d45873c5e0242196f042d555526f92aa9e0c32355a1be1ff8c27f077fd37/cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d", size = 467242, upload-time = "2024-09-04T20:44:03.467Z" }, - { url = "https://files.pythonhosted.org/packages/1a/52/d9a0e523a572fbccf2955f5abe883cfa8bcc570d7faeee06336fbd50c9fc/cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6", size = 477999, upload-time = "2024-09-04T20:44:05.023Z" }, - { url = "https://files.pythonhosted.org/packages/44/74/f2a2460684a1a2d00ca799ad880d54652841a780c4c97b87754f660c7603/cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f", size = 454242, upload-time = "2024-09-04T20:44:06.444Z" }, - { url = "https://files.pythonhosted.org/packages/f8/4a/34599cac7dfcd888ff54e801afe06a19c17787dfd94495ab0c8d35fe99fb/cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b", size = 478604, upload-time = "2024-09-04T20:44:08.206Z" }, - { url = "https://files.pythonhosted.org/packages/34/33/e1b8a1ba29025adbdcda5fb3a36f94c03d771c1b7b12f726ff7fef2ebe36/cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655", size = 171727, upload-time = "2024-09-04T20:44:09.481Z" }, - { url = "https://files.pythonhosted.org/packages/3d/97/50228be003bb2802627d28ec0627837ac0bf35c90cf769812056f235b2d1/cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0", size = 181400, upload-time = "2024-09-04T20:44:10.873Z" }, - { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178, upload-time = "2024-09-04T20:44:12.232Z" }, - { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840, upload-time = "2024-09-04T20:44:13.739Z" }, - { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803, upload-time = "2024-09-04T20:44:15.231Z" }, - { url = "https://files.pythonhosted.org/packages/1a/df/f8d151540d8c200eb1c6fba8cd0dfd40904f1b0682ea705c36e6c2e97ab3/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", size = 478850, upload-time = "2024-09-04T20:44:17.188Z" }, - { url = "https://files.pythonhosted.org/packages/28/c0/b31116332a547fd2677ae5b78a2ef662dfc8023d67f41b2a83f7c2aa78b1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", size = 485729, upload-time = "2024-09-04T20:44:18.688Z" }, - { url = "https://files.pythonhosted.org/packages/91/2b/9a1ddfa5c7f13cab007a2c9cc295b70fbbda7cb10a286aa6810338e60ea1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", size = 471256, upload-time = "2024-09-04T20:44:20.248Z" }, - { url = "https://files.pythonhosted.org/packages/b2/d5/da47df7004cb17e4955df6a43d14b3b4ae77737dff8bf7f8f333196717bf/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", size = 479424, upload-time = "2024-09-04T20:44:21.673Z" }, - { url = "https://files.pythonhosted.org/packages/0b/ac/2a28bcf513e93a219c8a4e8e125534f4f6db03e3179ba1c45e949b76212c/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", size = 484568, upload-time = "2024-09-04T20:44:23.245Z" }, - { url = "https://files.pythonhosted.org/packages/d4/38/ca8a4f639065f14ae0f1d9751e70447a261f1a30fa7547a828ae08142465/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", size = 488736, upload-time = "2024-09-04T20:44:24.757Z" }, - { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448, upload-time = "2024-09-04T20:44:26.208Z" }, - { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976, upload-time = "2024-09-04T20:44:27.578Z" }, - { url = "https://files.pythonhosted.org/packages/8d/f8/dd6c246b148639254dad4d6803eb6a54e8c85c6e11ec9df2cffa87571dbe/cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e", size = 182989, upload-time = "2024-09-04T20:44:28.956Z" }, - { url = "https://files.pythonhosted.org/packages/8b/f1/672d303ddf17c24fc83afd712316fda78dc6fce1cd53011b839483e1ecc8/cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2", size = 178802, upload-time = "2024-09-04T20:44:30.289Z" }, - { url = "https://files.pythonhosted.org/packages/0e/2d/eab2e858a91fdff70533cab61dcff4a1f55ec60425832ddfdc9cd36bc8af/cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3", size = 454792, upload-time = "2024-09-04T20:44:32.01Z" }, - { url = "https://files.pythonhosted.org/packages/75/b2/fbaec7c4455c604e29388d55599b99ebcc250a60050610fadde58932b7ee/cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683", size = 478893, upload-time = "2024-09-04T20:44:33.606Z" }, - { url = "https://files.pythonhosted.org/packages/4f/b7/6e4a2162178bf1935c336d4da8a9352cccab4d3a5d7914065490f08c0690/cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5", size = 485810, upload-time = "2024-09-04T20:44:35.191Z" }, - { url = "https://files.pythonhosted.org/packages/c7/8a/1d0e4a9c26e54746dc08c2c6c037889124d4f59dffd853a659fa545f1b40/cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4", size = 471200, upload-time = "2024-09-04T20:44:36.743Z" }, - { url = "https://files.pythonhosted.org/packages/26/9f/1aab65a6c0db35f43c4d1b4f580e8df53914310afc10ae0397d29d697af4/cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd", size = 479447, upload-time = "2024-09-04T20:44:38.492Z" }, - { url = "https://files.pythonhosted.org/packages/5f/e4/fb8b3dd8dc0e98edf1135ff067ae070bb32ef9d509d6cb0f538cd6f7483f/cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed", size = 484358, upload-time = "2024-09-04T20:44:40.046Z" }, - { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469, upload-time = "2024-09-04T20:44:41.616Z" }, - { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475, upload-time = "2024-09-04T20:44:43.733Z" }, - { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009, upload-time = "2024-09-04T20:44:45.309Z" }, -] - -[[package]] -name = "cfgv" -version = "3.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114, upload-time = "2023-08-12T20:38:17.776Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" }, -] - -[[package]] -name = "charset-normalizer" -version = "3.4.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188, upload-time = "2024-12-24T18:12:35.43Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/58/5580c1716040bc89206c77d8f74418caf82ce519aae06450393ca73475d1/charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de", size = 198013, upload-time = "2024-12-24T18:09:43.671Z" }, - { url = "https://files.pythonhosted.org/packages/d0/11/00341177ae71c6f5159a08168bcb98c6e6d196d372c94511f9f6c9afe0c6/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176", size = 141285, upload-time = "2024-12-24T18:09:48.113Z" }, - { url = "https://files.pythonhosted.org/packages/01/09/11d684ea5819e5a8f5100fb0b38cf8d02b514746607934134d31233e02c8/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037", size = 151449, upload-time = "2024-12-24T18:09:50.845Z" }, - { url = "https://files.pythonhosted.org/packages/08/06/9f5a12939db324d905dc1f70591ae7d7898d030d7662f0d426e2286f68c9/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f", size = 143892, upload-time = "2024-12-24T18:09:52.078Z" }, - { url = "https://files.pythonhosted.org/packages/93/62/5e89cdfe04584cb7f4d36003ffa2936681b03ecc0754f8e969c2becb7e24/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a", size = 146123, upload-time = "2024-12-24T18:09:54.575Z" }, - { url = "https://files.pythonhosted.org/packages/a9/ac/ab729a15c516da2ab70a05f8722ecfccc3f04ed7a18e45c75bbbaa347d61/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a", size = 147943, upload-time = "2024-12-24T18:09:57.324Z" }, - { url = "https://files.pythonhosted.org/packages/03/d2/3f392f23f042615689456e9a274640c1d2e5dd1d52de36ab8f7955f8f050/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247", size = 142063, upload-time = "2024-12-24T18:09:59.794Z" }, - { url = "https://files.pythonhosted.org/packages/f2/e3/e20aae5e1039a2cd9b08d9205f52142329f887f8cf70da3650326670bddf/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408", size = 150578, upload-time = "2024-12-24T18:10:02.357Z" }, - { url = "https://files.pythonhosted.org/packages/8d/af/779ad72a4da0aed925e1139d458adc486e61076d7ecdcc09e610ea8678db/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb", size = 153629, upload-time = "2024-12-24T18:10:03.678Z" }, - { url = "https://files.pythonhosted.org/packages/c2/b6/7aa450b278e7aa92cf7732140bfd8be21f5f29d5bf334ae987c945276639/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d", size = 150778, upload-time = "2024-12-24T18:10:06.197Z" }, - { url = "https://files.pythonhosted.org/packages/39/f4/d9f4f712d0951dcbfd42920d3db81b00dd23b6ab520419626f4023334056/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807", size = 146453, upload-time = "2024-12-24T18:10:08.848Z" }, - { url = "https://files.pythonhosted.org/packages/49/2b/999d0314e4ee0cff3cb83e6bc9aeddd397eeed693edb4facb901eb8fbb69/charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f", size = 95479, upload-time = "2024-12-24T18:10:10.044Z" }, - { url = "https://files.pythonhosted.org/packages/2d/ce/3cbed41cff67e455a386fb5e5dd8906cdda2ed92fbc6297921f2e4419309/charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f", size = 102790, upload-time = "2024-12-24T18:10:11.323Z" }, - { url = "https://files.pythonhosted.org/packages/72/80/41ef5d5a7935d2d3a773e3eaebf0a9350542f2cab4eac59a7a4741fbbbbe/charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125", size = 194995, upload-time = "2024-12-24T18:10:12.838Z" }, - { url = "https://files.pythonhosted.org/packages/7a/28/0b9fefa7b8b080ec492110af6d88aa3dea91c464b17d53474b6e9ba5d2c5/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1", size = 139471, upload-time = "2024-12-24T18:10:14.101Z" }, - { url = "https://files.pythonhosted.org/packages/71/64/d24ab1a997efb06402e3fc07317e94da358e2585165930d9d59ad45fcae2/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3", size = 149831, upload-time = "2024-12-24T18:10:15.512Z" }, - { url = "https://files.pythonhosted.org/packages/37/ed/be39e5258e198655240db5e19e0b11379163ad7070962d6b0c87ed2c4d39/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd", size = 142335, upload-time = "2024-12-24T18:10:18.369Z" }, - { url = "https://files.pythonhosted.org/packages/88/83/489e9504711fa05d8dde1574996408026bdbdbd938f23be67deebb5eca92/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00", size = 143862, upload-time = "2024-12-24T18:10:19.743Z" }, - { url = "https://files.pythonhosted.org/packages/c6/c7/32da20821cf387b759ad24627a9aca289d2822de929b8a41b6241767b461/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12", size = 145673, upload-time = "2024-12-24T18:10:21.139Z" }, - { url = "https://files.pythonhosted.org/packages/68/85/f4288e96039abdd5aeb5c546fa20a37b50da71b5cf01e75e87f16cd43304/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77", size = 140211, upload-time = "2024-12-24T18:10:22.382Z" }, - { url = "https://files.pythonhosted.org/packages/28/a3/a42e70d03cbdabc18997baf4f0227c73591a08041c149e710045c281f97b/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146", size = 148039, upload-time = "2024-12-24T18:10:24.802Z" }, - { url = "https://files.pythonhosted.org/packages/85/e4/65699e8ab3014ecbe6f5c71d1a55d810fb716bbfd74f6283d5c2aa87febf/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd", size = 151939, upload-time = "2024-12-24T18:10:26.124Z" }, - { url = "https://files.pythonhosted.org/packages/b1/82/8e9fe624cc5374193de6860aba3ea8070f584c8565ee77c168ec13274bd2/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6", size = 149075, upload-time = "2024-12-24T18:10:30.027Z" }, - { url = "https://files.pythonhosted.org/packages/3d/7b/82865ba54c765560c8433f65e8acb9217cb839a9e32b42af4aa8e945870f/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8", size = 144340, upload-time = "2024-12-24T18:10:32.679Z" }, - { url = "https://files.pythonhosted.org/packages/b5/b6/9674a4b7d4d99a0d2df9b215da766ee682718f88055751e1e5e753c82db0/charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b", size = 95205, upload-time = "2024-12-24T18:10:34.724Z" }, - { url = "https://files.pythonhosted.org/packages/1e/ab/45b180e175de4402dcf7547e4fb617283bae54ce35c27930a6f35b6bef15/charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76", size = 102441, upload-time = "2024-12-24T18:10:37.574Z" }, - { url = "https://files.pythonhosted.org/packages/0a/9a/dd1e1cdceb841925b7798369a09279bd1cf183cef0f9ddf15a3a6502ee45/charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545", size = 196105, upload-time = "2024-12-24T18:10:38.83Z" }, - { url = "https://files.pythonhosted.org/packages/d3/8c/90bfabf8c4809ecb648f39794cf2a84ff2e7d2a6cf159fe68d9a26160467/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7", size = 140404, upload-time = "2024-12-24T18:10:44.272Z" }, - { url = "https://files.pythonhosted.org/packages/ad/8f/e410d57c721945ea3b4f1a04b74f70ce8fa800d393d72899f0a40526401f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757", size = 150423, upload-time = "2024-12-24T18:10:45.492Z" }, - { url = "https://files.pythonhosted.org/packages/f0/b8/e6825e25deb691ff98cf5c9072ee0605dc2acfca98af70c2d1b1bc75190d/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa", size = 143184, upload-time = "2024-12-24T18:10:47.898Z" }, - { url = "https://files.pythonhosted.org/packages/3e/a2/513f6cbe752421f16d969e32f3583762bfd583848b763913ddab8d9bfd4f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d", size = 145268, upload-time = "2024-12-24T18:10:50.589Z" }, - { url = "https://files.pythonhosted.org/packages/74/94/8a5277664f27c3c438546f3eb53b33f5b19568eb7424736bdc440a88a31f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616", size = 147601, upload-time = "2024-12-24T18:10:52.541Z" }, - { url = "https://files.pythonhosted.org/packages/7c/5f/6d352c51ee763623a98e31194823518e09bfa48be2a7e8383cf691bbb3d0/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b", size = 141098, upload-time = "2024-12-24T18:10:53.789Z" }, - { url = "https://files.pythonhosted.org/packages/78/d4/f5704cb629ba5ab16d1d3d741396aec6dc3ca2b67757c45b0599bb010478/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d", size = 149520, upload-time = "2024-12-24T18:10:55.048Z" }, - { url = "https://files.pythonhosted.org/packages/c5/96/64120b1d02b81785f222b976c0fb79a35875457fa9bb40827678e54d1bc8/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a", size = 152852, upload-time = "2024-12-24T18:10:57.647Z" }, - { url = "https://files.pythonhosted.org/packages/84/c9/98e3732278a99f47d487fd3468bc60b882920cef29d1fa6ca460a1fdf4e6/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9", size = 150488, upload-time = "2024-12-24T18:10:59.43Z" }, - { url = "https://files.pythonhosted.org/packages/13/0e/9c8d4cb99c98c1007cc11eda969ebfe837bbbd0acdb4736d228ccaabcd22/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1", size = 146192, upload-time = "2024-12-24T18:11:00.676Z" }, - { url = "https://files.pythonhosted.org/packages/b2/21/2b6b5b860781a0b49427309cb8670785aa543fb2178de875b87b9cc97746/charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35", size = 95550, upload-time = "2024-12-24T18:11:01.952Z" }, - { url = "https://files.pythonhosted.org/packages/21/5b/1b390b03b1d16c7e382b561c5329f83cc06623916aab983e8ab9239c7d5c/charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f", size = 102785, upload-time = "2024-12-24T18:11:03.142Z" }, - { url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698, upload-time = "2024-12-24T18:11:05.834Z" }, - { url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162, upload-time = "2024-12-24T18:11:07.064Z" }, - { url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263, upload-time = "2024-12-24T18:11:08.374Z" }, - { url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966, upload-time = "2024-12-24T18:11:09.831Z" }, - { url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992, upload-time = "2024-12-24T18:11:12.03Z" }, - { url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162, upload-time = "2024-12-24T18:11:13.372Z" }, - { url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972, upload-time = "2024-12-24T18:11:14.628Z" }, - { url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095, upload-time = "2024-12-24T18:11:17.672Z" }, - { url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668, upload-time = "2024-12-24T18:11:18.989Z" }, - { url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073, upload-time = "2024-12-24T18:11:21.507Z" }, - { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732, upload-time = "2024-12-24T18:11:22.774Z" }, - { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391, upload-time = "2024-12-24T18:11:24.139Z" }, - { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702, upload-time = "2024-12-24T18:11:26.535Z" }, - { url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767, upload-time = "2024-12-24T18:12:32.852Z" }, -] - -[[package]] -name = "codespell" -version = "2.4.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/15/e0/709453393c0ea77d007d907dd436b3ee262e28b30995ea1aa36c6ffbccaf/codespell-2.4.1.tar.gz", hash = "sha256:299fcdcb09d23e81e35a671bbe746d5ad7e8385972e65dbb833a2eaac33c01e5", size = 344740, upload-time = "2025-01-28T18:52:39.411Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/20/01/b394922252051e97aab231d416c86da3d8a6d781eeadcdca1082867de64e/codespell-2.4.1-py3-none-any.whl", hash = "sha256:3dadafa67df7e4a3dbf51e0d7315061b80d265f9552ebd699b3dd6834b47e425", size = 344501, upload-time = "2025-01-28T18:52:37.057Z" }, -] - -[[package]] -name = "colorama" -version = "0.4.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, -] - -[[package]] -name = "cryptography" -version = "44.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/91/4c/45dfa6829acffa344e3967d6006ee4ae8be57af746ae2eba1c431949b32c/cryptography-44.0.0.tar.gz", hash = "sha256:cd4e834f340b4293430701e772ec543b0fbe6c2dea510a5286fe0acabe153a02", size = 710657, upload-time = "2024-11-27T18:07:10.168Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/55/09/8cc67f9b84730ad330b3b72cf867150744bf07ff113cda21a15a1c6d2c7c/cryptography-44.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:84111ad4ff3f6253820e6d3e58be2cc2a00adb29335d4cacb5ab4d4d34f2a123", size = 6541833, upload-time = "2024-11-27T18:05:55.475Z" }, - { url = "https://files.pythonhosted.org/packages/7e/5b/3759e30a103144e29632e7cb72aec28cedc79e514b2ea8896bb17163c19b/cryptography-44.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15492a11f9e1b62ba9d73c210e2416724633167de94607ec6069ef724fad092", size = 3922710, upload-time = "2024-11-27T18:05:58.621Z" }, - { url = "https://files.pythonhosted.org/packages/5f/58/3b14bf39f1a0cfd679e753e8647ada56cddbf5acebffe7db90e184c76168/cryptography-44.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831c3c4d0774e488fdc83a1923b49b9957d33287de923d58ebd3cec47a0ae43f", size = 4137546, upload-time = "2024-11-27T18:06:01.062Z" }, - { url = "https://files.pythonhosted.org/packages/98/65/13d9e76ca19b0ba5603d71ac8424b5694415b348e719db277b5edc985ff5/cryptography-44.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb", size = 3915420, upload-time = "2024-11-27T18:06:03.487Z" }, - { url = "https://files.pythonhosted.org/packages/b1/07/40fe09ce96b91fc9276a9ad272832ead0fddedcba87f1190372af8e3039c/cryptography-44.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b", size = 4154498, upload-time = "2024-11-27T18:06:05.763Z" }, - { url = "https://files.pythonhosted.org/packages/75/ea/af65619c800ec0a7e4034207aec543acdf248d9bffba0533342d1bd435e1/cryptography-44.0.0-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543", size = 3932569, upload-time = "2024-11-27T18:06:07.489Z" }, - { url = "https://files.pythonhosted.org/packages/c7/af/d1deb0c04d59612e3d5e54203159e284d3e7a6921e565bb0eeb6269bdd8a/cryptography-44.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e", size = 4016721, upload-time = "2024-11-27T18:06:11.57Z" }, - { url = "https://files.pythonhosted.org/packages/bd/69/7ca326c55698d0688db867795134bdfac87136b80ef373aaa42b225d6dd5/cryptography-44.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e", size = 4240915, upload-time = "2024-11-27T18:06:13.515Z" }, - { url = "https://files.pythonhosted.org/packages/ef/d4/cae11bf68c0f981e0413906c6dd03ae7fa864347ed5fac40021df1ef467c/cryptography-44.0.0-cp37-abi3-win32.whl", hash = "sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053", size = 2757925, upload-time = "2024-11-27T18:06:16.019Z" }, - { url = "https://files.pythonhosted.org/packages/64/b1/50d7739254d2002acae64eed4fc43b24ac0cc44bf0a0d388d1ca06ec5bb1/cryptography-44.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:abc998e0c0eee3c8a1904221d3f67dcfa76422b23620173e28c11d3e626c21bd", size = 3202055, upload-time = "2024-11-27T18:06:19.113Z" }, - { url = "https://files.pythonhosted.org/packages/11/18/61e52a3d28fc1514a43b0ac291177acd1b4de00e9301aaf7ef867076ff8a/cryptography-44.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:660cb7312a08bc38be15b696462fa7cc7cd85c3ed9c576e81f4dc4d8b2b31591", size = 6542801, upload-time = "2024-11-27T18:06:21.431Z" }, - { url = "https://files.pythonhosted.org/packages/1a/07/5f165b6c65696ef75601b781a280fc3b33f1e0cd6aa5a92d9fb96c410e97/cryptography-44.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1923cb251c04be85eec9fda837661c67c1049063305d6be5721643c22dd4e2b7", size = 3922613, upload-time = "2024-11-27T18:06:24.314Z" }, - { url = "https://files.pythonhosted.org/packages/28/34/6b3ac1d80fc174812486561cf25194338151780f27e438526f9c64e16869/cryptography-44.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:404fdc66ee5f83a1388be54300ae978b2efd538018de18556dde92575e05defc", size = 4137925, upload-time = "2024-11-27T18:06:27.079Z" }, - { url = "https://files.pythonhosted.org/packages/d0/c7/c656eb08fd22255d21bc3129625ed9cd5ee305f33752ef2278711b3fa98b/cryptography-44.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289", size = 3915417, upload-time = "2024-11-27T18:06:28.959Z" }, - { url = "https://files.pythonhosted.org/packages/ef/82/72403624f197af0db6bac4e58153bc9ac0e6020e57234115db9596eee85d/cryptography-44.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7", size = 4155160, upload-time = "2024-11-27T18:06:30.866Z" }, - { url = "https://files.pythonhosted.org/packages/a2/cd/2f3c440913d4329ade49b146d74f2e9766422e1732613f57097fea61f344/cryptography-44.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c", size = 3932331, upload-time = "2024-11-27T18:06:33.432Z" }, - { url = "https://files.pythonhosted.org/packages/7f/df/8be88797f0a1cca6e255189a57bb49237402b1880d6e8721690c5603ac23/cryptography-44.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64", size = 4017372, upload-time = "2024-11-27T18:06:38.343Z" }, - { url = "https://files.pythonhosted.org/packages/af/36/5ccc376f025a834e72b8e52e18746b927f34e4520487098e283a719c205e/cryptography-44.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285", size = 4239657, upload-time = "2024-11-27T18:06:41.045Z" }, - { url = "https://files.pythonhosted.org/packages/46/b0/f4f7d0d0bcfbc8dd6296c1449be326d04217c57afb8b2594f017eed95533/cryptography-44.0.0-cp39-abi3-win32.whl", hash = "sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417", size = 2758672, upload-time = "2024-11-27T18:06:43.566Z" }, - { url = "https://files.pythonhosted.org/packages/97/9b/443270b9210f13f6ef240eff73fd32e02d381e7103969dc66ce8e89ee901/cryptography-44.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:708ee5f1bafe76d041b53a4f95eb28cdeb8d18da17e597d46d7833ee59b97ede", size = 3202071, upload-time = "2024-11-27T18:06:45.586Z" }, - { url = "https://files.pythonhosted.org/packages/77/d4/fea74422326388bbac0c37b7489a0fcb1681a698c3b875959430ba550daa/cryptography-44.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37d76e6863da3774cd9db5b409a9ecfd2c71c981c38788d3fcfaf177f447b731", size = 3338857, upload-time = "2024-11-27T18:06:48.88Z" }, - { url = "https://files.pythonhosted.org/packages/1a/aa/ba8a7467c206cb7b62f09b4168da541b5109838627f582843bbbe0235e8e/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:f677e1268c4e23420c3acade68fac427fffcb8d19d7df95ed7ad17cdef8404f4", size = 3850615, upload-time = "2024-11-27T18:06:50.774Z" }, - { url = "https://files.pythonhosted.org/packages/89/fa/b160e10a64cc395d090105be14f399b94e617c879efd401188ce0fea39ee/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f5e7cb1e5e56ca0933b4873c0220a78b773b24d40d186b6738080b73d3d0a756", size = 4081622, upload-time = "2024-11-27T18:06:55.126Z" }, - { url = "https://files.pythonhosted.org/packages/47/8f/20ff0656bb0cf7af26ec1d01f780c5cfbaa7666736063378c5f48558b515/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:8b3e6eae66cf54701ee7d9c83c30ac0a1e3fa17be486033000f2a73a12ab507c", size = 3867546, upload-time = "2024-11-27T18:06:57.694Z" }, - { url = "https://files.pythonhosted.org/packages/38/d9/28edf32ee2fcdca587146bcde90102a7319b2f2c690edfa627e46d586050/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:be4ce505894d15d5c5037167ffb7f0ae90b7be6f2a98f9a5c3442395501c32fa", size = 4090937, upload-time = "2024-11-27T18:07:00.338Z" }, - { url = "https://files.pythonhosted.org/packages/cc/9d/37e5da7519de7b0b070a3fedd4230fe76d50d2a21403e0f2153d70ac4163/cryptography-44.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:62901fb618f74d7d81bf408c8719e9ec14d863086efe4185afd07c352aee1d2c", size = 3128774, upload-time = "2024-11-27T18:07:02.157Z" }, -] - -[[package]] -name = "datafusion" -source = { editable = "." } -dependencies = [ - { name = "pyarrow" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, -] - -[package.dev-dependencies] -dev = [ - { name = "arro3-core" }, - { name = "codespell" }, - { name = "maturin" }, - { name = "nanoarrow" }, - { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, - { name = "numpy", version = "2.3.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, - { name = "pre-commit" }, - { name = "pyarrow" }, - { name = "pygithub" }, - { name = "pytest" }, - { name = "pytest-asyncio" }, - { name = "pyyaml" }, - { name = "ruff" }, - { name = "toml" }, -] -docs = [ - { name = "ipython" }, - { name = "jinja2" }, - { name = "myst-parser" }, - { name = "pandas" }, - { name = "pickleshare" }, - { name = "pydata-sphinx-theme" }, - { name = "setuptools" }, - { name = "sphinx" }, - { name = "sphinx-autoapi" }, -] - -[package.metadata] -requires-dist = [ - { name = "pyarrow", marker = "python_full_version < '3.14'", specifier = ">=16.0.0" }, - { name = "pyarrow", marker = "python_full_version >= '3.14'", specifier = ">=22.0.0" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, -] - -[package.metadata.requires-dev] -dev = [ - { name = "arro3-core", specifier = "==0.6.5" }, - { name = "codespell", specifier = "==2.4.1" }, - { name = "maturin", specifier = ">=1.8.1" }, - { name = "nanoarrow", specifier = "==0.8.0" }, - { name = "numpy", marker = "python_full_version < '3.14'", specifier = ">1.25.0" }, - { name = "numpy", marker = "python_full_version >= '3.14'", specifier = ">=2.3.2" }, - { name = "pre-commit", specifier = ">=4.3.0" }, - { name = "pyarrow", specifier = ">=19.0.0" }, - { name = "pygithub", specifier = "==2.5.0" }, - { name = "pytest", specifier = ">=7.4.4" }, - { name = "pytest-asyncio", specifier = ">=0.23.3" }, - { name = "pyyaml", specifier = ">=6.0.3" }, - { name = "ruff", specifier = ">=0.9.1" }, - { name = "toml", specifier = ">=0.10.2" }, -] -docs = [ - { name = "ipython", specifier = ">=8.12.3" }, - { name = "jinja2", specifier = ">=3.1.5" }, - { name = "myst-parser", specifier = ">=3.0.1" }, - { name = "pandas", specifier = ">=2.0.3" }, - { name = "pickleshare", specifier = ">=0.7.5" }, - { name = "pydata-sphinx-theme", specifier = "==0.8.0" }, - { name = "setuptools", specifier = ">=75.3.0" }, - { name = "sphinx", specifier = ">=7.1.2" }, - { name = "sphinx-autoapi", specifier = ">=3.4.0" }, -] - -[[package]] -name = "decorator" -version = "5.1.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/66/0c/8d907af351aa16b42caae42f9d6aa37b900c67308052d10fdce809f8d952/decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", size = 35016, upload-time = "2022-01-07T08:20:05.666Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d5/50/83c593b07763e1161326b3b8c6686f0f4b0f24d5526546bee538c89837d6/decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186", size = 9073, upload-time = "2022-01-07T08:20:03.734Z" }, -] - -[[package]] -name = "deprecated" -version = "1.2.18" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "wrapt" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744, upload-time = "2025-01-27T10:46:25.7Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998, upload-time = "2025-01-27T10:46:09.186Z" }, -] - -[[package]] -name = "distlib" -version = "0.3.9" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0d/dd/1bec4c5ddb504ca60fc29472f3d27e8d4da1257a854e1d96742f15c1d02d/distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403", size = 613923, upload-time = "2024-10-09T18:35:47.551Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973, upload-time = "2024-10-09T18:35:44.272Z" }, -] - -[[package]] -name = "docutils" -version = "0.21.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444, upload-time = "2024-04-23T18:57:18.24Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408, upload-time = "2024-04-23T18:57:14.835Z" }, -] - -[[package]] -name = "exceptiongroup" -version = "1.2.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/09/35/2495c4ac46b980e4ca1f6ad6db102322ef3ad2410b79fdde159a4b0f3b92/exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc", size = 28883, upload-time = "2024-07-12T22:26:00.161Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453, upload-time = "2024-07-12T22:25:58.476Z" }, -] - -[[package]] -name = "executing" -version = "2.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8c/e3/7d45f492c2c4a0e8e0fad57d081a7c8a0286cdd86372b070cca1ec0caa1e/executing-2.1.0.tar.gz", hash = "sha256:8ea27ddd260da8150fa5a708269c4a10e76161e2496ec3e587da9e3c0fe4b9ab", size = 977485, upload-time = "2024-09-01T12:37:35.708Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/fd/afcd0496feca3276f509df3dbd5dae726fcc756f1a08d9e25abe1733f962/executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf", size = 25805, upload-time = "2024-09-01T12:37:33.007Z" }, -] - -[[package]] -name = "filelock" -version = "3.18.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075, upload-time = "2025-03-14T07:11:40.47Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" }, -] - -[[package]] -name = "identify" -version = "2.6.12" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/88/d193a27416618628a5eea64e3223acd800b40749a96ffb322a9b55a49ed1/identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6", size = 99254, upload-time = "2025-05-23T20:37:53.3Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7a/cd/18f8da995b658420625f7ef13f037be53ae04ec5ad33f9b718240dcfd48c/identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2", size = 99145, upload-time = "2025-05-23T20:37:51.495Z" }, -] - -[[package]] -name = "idna" -version = "3.10" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, -] - -[[package]] -name = "imagesize" -version = "1.4.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a7/84/62473fb57d61e31fef6e36d64a179c8781605429fd927b5dd608c997be31/imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a", size = 1280026, upload-time = "2022-07-01T12:21:05.687Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", size = 8769, upload-time = "2022-07-01T12:21:02.467Z" }, -] - -[[package]] -name = "iniconfig" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646, upload-time = "2023-01-07T11:08:11.254Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892, upload-time = "2023-01-07T11:08:09.864Z" }, -] - -[[package]] -name = "ipython" -version = "8.31.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "decorator" }, - { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, - { name = "jedi" }, - { name = "matplotlib-inline" }, - { name = "pexpect", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, - { name = "prompt-toolkit" }, - { name = "pygments" }, - { name = "stack-data" }, - { name = "traitlets" }, - { name = "typing-extensions", marker = "python_full_version < '3.12'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/01/35/6f90fdddff7a08b7b715fccbd2427b5212c9525cd043d26fdc45bee0708d/ipython-8.31.0.tar.gz", hash = "sha256:b6a2274606bec6166405ff05e54932ed6e5cfecaca1fc05f2cacde7bb074d70b", size = 5501011, upload-time = "2024-12-20T12:34:22.61Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/60/d0feb6b6d9fe4ab89fe8fe5b47cbf6cd936bfd9f1e7ffa9d0015425aeed6/ipython-8.31.0-py3-none-any.whl", hash = "sha256:46ec58f8d3d076a61d128fe517a51eb730e3aaf0c184ea8c17d16e366660c6a6", size = 821583, upload-time = "2024-12-20T12:34:17.106Z" }, -] - -[[package]] -name = "jedi" -version = "0.19.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "parso" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287, upload-time = "2024-11-11T01:41:42.873Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" }, -] - -[[package]] -name = "jinja2" -version = "3.1.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markupsafe" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/af/92/b3130cbbf5591acf9ade8708c365f3238046ac7cb8ccba6e81abccb0ccff/jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb", size = 244674, upload-time = "2024-12-21T18:30:22.828Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bd/0f/2ba5fbcd631e3e88689309dbe978c5769e883e4b84ebfe7da30b43275c5a/jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb", size = 134596, upload-time = "2024-12-21T18:30:19.133Z" }, -] - -[[package]] -name = "markdown-it-py" -version = "3.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mdurl" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528, upload-time = "2023-06-03T06:41:11.019Z" }, -] - -[[package]] -name = "markupsafe" -version = "3.0.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537, upload-time = "2024-10-18T15:21:54.129Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/90/d08277ce111dd22f77149fd1a5d4653eeb3b3eaacbdfcbae5afb2600eebd/MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8", size = 14357, upload-time = "2024-10-18T15:20:51.44Z" }, - { url = "https://files.pythonhosted.org/packages/04/e1/6e2194baeae0bca1fae6629dc0cbbb968d4d941469cbab11a3872edff374/MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158", size = 12393, upload-time = "2024-10-18T15:20:52.426Z" }, - { url = "https://files.pythonhosted.org/packages/1d/69/35fa85a8ece0a437493dc61ce0bb6d459dcba482c34197e3efc829aa357f/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579", size = 21732, upload-time = "2024-10-18T15:20:53.578Z" }, - { url = "https://files.pythonhosted.org/packages/22/35/137da042dfb4720b638d2937c38a9c2df83fe32d20e8c8f3185dbfef05f7/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d", size = 20866, upload-time = "2024-10-18T15:20:55.06Z" }, - { url = "https://files.pythonhosted.org/packages/29/28/6d029a903727a1b62edb51863232152fd335d602def598dade38996887f0/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb", size = 20964, upload-time = "2024-10-18T15:20:55.906Z" }, - { url = "https://files.pythonhosted.org/packages/cc/cd/07438f95f83e8bc028279909d9c9bd39e24149b0d60053a97b2bc4f8aa51/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b", size = 21977, upload-time = "2024-10-18T15:20:57.189Z" }, - { url = "https://files.pythonhosted.org/packages/29/01/84b57395b4cc062f9c4c55ce0df7d3108ca32397299d9df00fedd9117d3d/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c", size = 21366, upload-time = "2024-10-18T15:20:58.235Z" }, - { url = "https://files.pythonhosted.org/packages/bd/6e/61ebf08d8940553afff20d1fb1ba7294b6f8d279df9fd0c0db911b4bbcfd/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171", size = 21091, upload-time = "2024-10-18T15:20:59.235Z" }, - { url = "https://files.pythonhosted.org/packages/11/23/ffbf53694e8c94ebd1e7e491de185124277964344733c45481f32ede2499/MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50", size = 15065, upload-time = "2024-10-18T15:21:00.307Z" }, - { url = "https://files.pythonhosted.org/packages/44/06/e7175d06dd6e9172d4a69a72592cb3f7a996a9c396eee29082826449bbc3/MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a", size = 15514, upload-time = "2024-10-18T15:21:01.122Z" }, - { url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353, upload-time = "2024-10-18T15:21:02.187Z" }, - { url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392, upload-time = "2024-10-18T15:21:02.941Z" }, - { url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984, upload-time = "2024-10-18T15:21:03.953Z" }, - { url = "https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", size = 23120, upload-time = "2024-10-18T15:21:06.495Z" }, - { url = "https://files.pythonhosted.org/packages/8d/21/5e4851379f88f3fad1de30361db501300d4f07bcad047d3cb0449fc51f8c/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", size = 23032, upload-time = "2024-10-18T15:21:07.295Z" }, - { url = "https://files.pythonhosted.org/packages/00/7b/e92c64e079b2d0d7ddf69899c98842f3f9a60a1ae72657c89ce2655c999d/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", size = 24057, upload-time = "2024-10-18T15:21:08.073Z" }, - { url = "https://files.pythonhosted.org/packages/f9/ac/46f960ca323037caa0a10662ef97d0a4728e890334fc156b9f9e52bcc4ca/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", size = 23359, upload-time = "2024-10-18T15:21:09.318Z" }, - { url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306, upload-time = "2024-10-18T15:21:10.185Z" }, - { url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094, upload-time = "2024-10-18T15:21:11.005Z" }, - { url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521, upload-time = "2024-10-18T15:21:12.911Z" }, - { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274, upload-time = "2024-10-18T15:21:13.777Z" }, - { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348, upload-time = "2024-10-18T15:21:14.822Z" }, - { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149, upload-time = "2024-10-18T15:21:15.642Z" }, - { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118, upload-time = "2024-10-18T15:21:17.133Z" }, - { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993, upload-time = "2024-10-18T15:21:18.064Z" }, - { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178, upload-time = "2024-10-18T15:21:18.859Z" }, - { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319, upload-time = "2024-10-18T15:21:19.671Z" }, - { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352, upload-time = "2024-10-18T15:21:20.971Z" }, - { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097, upload-time = "2024-10-18T15:21:22.646Z" }, - { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601, upload-time = "2024-10-18T15:21:23.499Z" }, - { url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274, upload-time = "2024-10-18T15:21:24.577Z" }, - { url = "https://files.pythonhosted.org/packages/2b/6d/9409f3684d3335375d04e5f05744dfe7e9f120062c9857df4ab490a1031a/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", size = 12352, upload-time = "2024-10-18T15:21:25.382Z" }, - { url = "https://files.pythonhosted.org/packages/d2/f5/6eadfcd3885ea85fe2a7c128315cc1bb7241e1987443d78c8fe712d03091/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", size = 24122, upload-time = "2024-10-18T15:21:26.199Z" }, - { url = "https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", size = 23085, upload-time = "2024-10-18T15:21:27.029Z" }, - { url = "https://files.pythonhosted.org/packages/c2/cf/c9d56af24d56ea04daae7ac0940232d31d5a8354f2b457c6d856b2057d69/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", size = 22978, upload-time = "2024-10-18T15:21:27.846Z" }, - { url = "https://files.pythonhosted.org/packages/2a/9f/8619835cd6a711d6272d62abb78c033bda638fdc54c4e7f4272cf1c0962b/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", size = 24208, upload-time = "2024-10-18T15:21:28.744Z" }, - { url = "https://files.pythonhosted.org/packages/f9/bf/176950a1792b2cd2102b8ffeb5133e1ed984547b75db47c25a67d3359f77/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", size = 23357, upload-time = "2024-10-18T15:21:29.545Z" }, - { url = "https://files.pythonhosted.org/packages/ce/4f/9a02c1d335caabe5c4efb90e1b6e8ee944aa245c1aaaab8e8a618987d816/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", size = 23344, upload-time = "2024-10-18T15:21:30.366Z" }, - { url = "https://files.pythonhosted.org/packages/ee/55/c271b57db36f748f0e04a759ace9f8f759ccf22b4960c270c78a394f58be/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", size = 15101, upload-time = "2024-10-18T15:21:31.207Z" }, - { url = "https://files.pythonhosted.org/packages/29/88/07df22d2dd4df40aba9f3e402e6dc1b8ee86297dddbad4872bd5e7b0094f/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", size = 15603, upload-time = "2024-10-18T15:21:32.032Z" }, - { url = "https://files.pythonhosted.org/packages/62/6a/8b89d24db2d32d433dffcd6a8779159da109842434f1dd2f6e71f32f738c/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", size = 14510, upload-time = "2024-10-18T15:21:33.625Z" }, - { url = "https://files.pythonhosted.org/packages/7a/06/a10f955f70a2e5a9bf78d11a161029d278eeacbd35ef806c3fd17b13060d/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", size = 12486, upload-time = "2024-10-18T15:21:34.611Z" }, - { url = "https://files.pythonhosted.org/packages/34/cf/65d4a571869a1a9078198ca28f39fba5fbb910f952f9dbc5220afff9f5e6/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", size = 25480, upload-time = "2024-10-18T15:21:35.398Z" }, - { url = "https://files.pythonhosted.org/packages/0c/e3/90e9651924c430b885468b56b3d597cabf6d72be4b24a0acd1fa0e12af67/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", size = 23914, upload-time = "2024-10-18T15:21:36.231Z" }, - { url = "https://files.pythonhosted.org/packages/66/8c/6c7cf61f95d63bb866db39085150df1f2a5bd3335298f14a66b48e92659c/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", size = 23796, upload-time = "2024-10-18T15:21:37.073Z" }, - { url = "https://files.pythonhosted.org/packages/bb/35/cbe9238ec3f47ac9a7c8b3df7a808e7cb50fe149dc7039f5f454b3fba218/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", size = 25473, upload-time = "2024-10-18T15:21:37.932Z" }, - { url = "https://files.pythonhosted.org/packages/e6/32/7621a4382488aa283cc05e8984a9c219abad3bca087be9ec77e89939ded9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", size = 24114, upload-time = "2024-10-18T15:21:39.799Z" }, - { url = "https://files.pythonhosted.org/packages/0d/80/0985960e4b89922cb5a0bac0ed39c5b96cbc1a536a99f30e8c220a996ed9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", size = 24098, upload-time = "2024-10-18T15:21:40.813Z" }, - { url = "https://files.pythonhosted.org/packages/82/78/fedb03c7d5380df2427038ec8d973587e90561b2d90cd472ce9254cf348b/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", size = 15208, upload-time = "2024-10-18T15:21:41.814Z" }, - { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739, upload-time = "2024-10-18T15:21:42.784Z" }, -] - -[[package]] -name = "matplotlib-inline" -version = "0.1.7" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "traitlets" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/99/5b/a36a337438a14116b16480db471ad061c36c3694df7c2084a0da7ba538b7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", size = 8159, upload-time = "2024-04-15T13:44:44.803Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899, upload-time = "2024-04-15T13:44:43.265Z" }, -] - -[[package]] -name = "maturin" -version = "1.8.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "tomli", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9a/08/ccb0f917722a35ab0d758be9bb5edaf645c3a3d6170061f10d396ecd273f/maturin-1.8.1.tar.gz", hash = "sha256:49cd964aabf59f8b0a6969f9860d2cdf194ac331529caae14c884f5659568857", size = 197397, upload-time = "2024-12-30T14:03:48.109Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4c/00/f34077315f34db8ad2ccf6bfe11b864ca27baab3a1320634da8e3cf89a48/maturin-1.8.1-py3-none-linux_armv6l.whl", hash = "sha256:7e590a23d9076b8a994f2e67bc63dc9a2d1c9a41b1e7b45ac354ba8275254e89", size = 7568415, upload-time = "2024-12-30T14:03:07.939Z" }, - { url = "https://files.pythonhosted.org/packages/5c/07/9219976135ce0cb32d2fa6ea5c6d0ad709013d9a17967312e149b98153a6/maturin-1.8.1-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:8d8251a95682c83ea60988c804b620c181911cd824aa107b4a49ac5333c92968", size = 14527816, upload-time = "2024-12-30T14:03:13.851Z" }, - { url = "https://files.pythonhosted.org/packages/e6/04/fa009a00903acdd1785d58322193140bfe358595347c39f315112dabdf9e/maturin-1.8.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b9fc1a4354cac5e32c190410208039812ea88c4a36bd2b6499268ec49ef5de00", size = 7580446, upload-time = "2024-12-30T14:03:17.64Z" }, - { url = "https://files.pythonhosted.org/packages/9b/d4/414b2aab9bbfe88182b734d3aa1b4fef7d7701e50f6be48500378b8c8721/maturin-1.8.1-py3-none-manylinux_2_12_i686.manylinux2010_i686.musllinux_1_1_i686.whl", hash = "sha256:621e171c6b39f95f1d0df69a118416034fbd59c0f89dcaea8c2ea62019deecba", size = 7650535, upload-time = "2024-12-30T14:03:21.115Z" }, - { url = "https://files.pythonhosted.org/packages/f0/64/879418a8a0196013ec1fb19eada0781c04a30e8d6d9227e80f91275a4f5b/maturin-1.8.1-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl", hash = "sha256:98f638739a5132962347871b85c91f525c9246ef4d99796ae98a2031e3df029f", size = 8006702, upload-time = "2024-12-30T14:03:24.318Z" }, - { url = "https://files.pythonhosted.org/packages/39/c2/605829324f8371294f70303aca130682df75318958efed246873d3d604ab/maturin-1.8.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:f9f5c47521924b6e515cbc652a042fe5f17f8747445be9d931048e5d8ddb50a4", size = 7368164, upload-time = "2024-12-30T14:03:26.582Z" }, - { url = "https://files.pythonhosted.org/packages/be/6c/30e136d397bb146b94b628c0ef7f17708281611b97849e2cf37847025ac7/maturin-1.8.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:0f4407c7353c31bfbb8cdeb82bc2170e474cbfb97b5ba27568f440c9d6c1fdd4", size = 7450889, upload-time = "2024-12-30T14:03:28.893Z" }, - { url = "https://files.pythonhosted.org/packages/1b/50/e1f5023512696d4e56096f702e2f68d6d9a30afe0a4eec82b0e27b8eb4e4/maturin-1.8.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.musllinux_1_1_ppc64le.whl", hash = "sha256:ec49cd70cad3c389946c6e2bc0bd50772a7fcb463040dd800720345897eec9bf", size = 9585819, upload-time = "2024-12-30T14:03:31.125Z" }, - { url = "https://files.pythonhosted.org/packages/b7/80/b24b5248d89d2e5982553900237a337ea098ca9297b8369ca2aa95549e0f/maturin-1.8.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c08767d794de8f8a11c5c8b1b47a4ff9fb6ae2d2d97679e27030f2f509c8c2a0", size = 10920801, upload-time = "2024-12-30T14:03:35.127Z" }, - { url = "https://files.pythonhosted.org/packages/6e/f4/8ede7a662fabf93456b44390a5ad22630e25fb5ddaecf787251071b2e143/maturin-1.8.1-py3-none-win32.whl", hash = "sha256:d678407713f3e10df33c5b3d7a343ec0551eb7f14d8ad9ba6febeb96f4e4c75c", size = 6873556, upload-time = "2024-12-30T14:03:37.913Z" }, - { url = "https://files.pythonhosted.org/packages/9c/22/757f093ed0e319e9648155b8c9d716765442bea5bc98ebc58ad4ad5b0524/maturin-1.8.1-py3-none-win_amd64.whl", hash = "sha256:a526f90fe0e5cb59ffb81f4ff547ddc42e823bbdeae4a31012c0893ca6dcaf46", size = 7823153, upload-time = "2024-12-30T14:03:40.33Z" }, - { url = "https://files.pythonhosted.org/packages/a4/f5/051413e04f6da25069db5e76759ecdb8cd2a8ab4a94045b5a3bf548c66fa/maturin-1.8.1-py3-none-win_arm64.whl", hash = "sha256:e95f077fd2ddd2f048182880eed458c308571a534be3eb2add4d3dac55bf57f4", size = 6552131, upload-time = "2024-12-30T14:03:45.203Z" }, -] - -[[package]] -name = "mdit-py-plugins" -version = "0.4.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markdown-it-py" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/19/03/a2ecab526543b152300717cf232bb4bb8605b6edb946c845016fa9c9c9fd/mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5", size = 43542, upload-time = "2024-09-09T20:27:49.564Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a7/f7/7782a043553ee469c1ff49cfa1cdace2d6bf99a1f333cf38676b3ddf30da/mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636", size = 55316, upload-time = "2024-09-09T20:27:48.397Z" }, -] - -[[package]] -name = "mdurl" -version = "0.1.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, -] - -[[package]] -name = "myst-parser" -version = "4.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "docutils" }, - { name = "jinja2" }, - { name = "markdown-it-py" }, - { name = "mdit-py-plugins" }, - { name = "pyyaml" }, - { name = "sphinx" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/85/55/6d1741a1780e5e65038b74bce6689da15f620261c490c3511eb4c12bac4b/myst_parser-4.0.0.tar.gz", hash = "sha256:851c9dfb44e36e56d15d05e72f02b80da21a9e0d07cba96baf5e2d476bb91531", size = 93858, upload-time = "2024-08-05T14:02:45.798Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ca/b4/b036f8fdb667587bb37df29dc6644681dd78b7a2a6321a34684b79412b28/myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d", size = 84563, upload-time = "2024-08-05T14:02:43.767Z" }, -] - -[[package]] -name = "nanoarrow" -version = "0.8.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/70/29/7b1ab53ed83fb70c80571a2487070113881b54067bda72cd87affc360ccc/nanoarrow-0.8.0.tar.gz", hash = "sha256:aa63e01e799380ec4f8adab88f4faac8d27bfb725fe1009fe73d7ce4efd9f7f6", size = 3508214, upload-time = "2026-02-10T03:36:02.427Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/de/27/7aece654f60453026fe36985291853243485ac41dfb9a69e421cdd2271fe/nanoarrow-0.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c5ea89651e49afa2674557005938963cb849d3c65f2f22ac6701c281a7e0244d", size = 834242, upload-time = "2026-02-10T03:33:40.25Z" }, - { url = "https://files.pythonhosted.org/packages/8b/63/2960ea0b1bfeec0381f01e6f7652c104683444b7c9902f42907c911630e9/nanoarrow-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7861371960d09adb377d05da73190103178410dc014369779734f2dbff0ac0ad", size = 741604, upload-time = "2026-02-10T03:33:29.29Z" }, - { url = "https://files.pythonhosted.org/packages/f0/6d/9de1da912da0356169836af8ccecac1664ee4d603b65b7067a27b85ebaf2/nanoarrow-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db62ea708c873980eeb0e711fa6162120d1e372b2404bb79ead69f9aa0560192", size = 970784, upload-time = "2026-02-10T03:32:30.293Z" }, - { url = "https://files.pythonhosted.org/packages/a7/a9/5e62e7f1b9b41ff86d6025c57636246e1e8702b0cba322fab0272c3cc0f8/nanoarrow-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:178cc6d097b988d13354c6a48a873b4496c7bcedce43c55c6770186b6d1b4845", size = 1018693, upload-time = "2026-02-10T03:32:51.819Z" }, - { url = "https://files.pythonhosted.org/packages/68/4d/70eb2a672ca81d4385069eb6fc70fa6ab44a029d18df4da48e6691e6d8ba/nanoarrow-0.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f5715e68cc17ccec23e1fcb9e828281bdf6afa11d78c8b0cd9a343c1ac48fb1d", size = 1145087, upload-time = "2026-02-10T03:32:52.801Z" }, - { url = "https://files.pythonhosted.org/packages/83/0e/02698dc0a4af10670822b949cdf0999134152347138d553d440b8f14f471/nanoarrow-0.8.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:958572c48d53f79693f30070fd4531f4d9643aa62e03ea1336ea2fc69e9e964d", size = 989528, upload-time = "2026-02-10T03:32:31.632Z" }, - { url = "https://files.pythonhosted.org/packages/e3/b4/1a5f3c10ad667ac9f0dfbde2416124025bdaf963d3915968b1ae6f5f9e85/nanoarrow-0.8.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:dc1a1fe64c6b1177314eb4c36d9037268257d6699b052f9462a99e056703f4cb", size = 1159183, upload-time = "2026-02-10T03:32:54.157Z" }, - { url = "https://files.pythonhosted.org/packages/22/28/8c314b5f0bb5c27d1c6164fd8f90d52f02e08defc2d8880466610ecfefdc/nanoarrow-0.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:feae14c938fe2192f1bea1d0f8e87be9446703d2997bbd555c949df36eed6d32", size = 1031646, upload-time = "2026-02-10T03:32:55.228Z" }, - { url = "https://files.pythonhosted.org/packages/b3/98/3314109e7064f84e25cfc6b7d460177d92dab7eabd149a5b78c1463ad797/nanoarrow-0.8.0-cp310-cp310-win32.whl", hash = "sha256:491a8aedbbbe4dd06660d641762ad9cb9743c39b96259f7795a4ac22cc046f18", size = 566048, upload-time = "2026-02-10T03:35:53.806Z" }, - { url = "https://files.pythonhosted.org/packages/b3/f1/602c7be675383f725382f4eed0019ba840a8354d2eb732e56e336245182f/nanoarrow-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:1c3b2c6ff46c9cd37350b9855829c0eed1256311e4fea0fcbc8aa9c2080b80ca", size = 658209, upload-time = "2026-02-10T03:33:50.792Z" }, - { url = "https://files.pythonhosted.org/packages/22/89/3ba932b982d26c7f38c1c54cf97dde05ad141045c106b6f1664151c22387/nanoarrow-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:31445b4cb891f77cb0261a0790222c9584c122f6d851e5818bc50a2678ae7bc4", size = 832763, upload-time = "2026-02-10T03:33:41.41Z" }, - { url = "https://files.pythonhosted.org/packages/91/1e/70ff64e9ecbf2744aa7527f721bed8f5e549dabbe1c02ceb6afafa651ba5/nanoarrow-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5a579bd43011d2f5cb5a9ba3a7352bd4e3783f3adedb59b93540af71949433cf", size = 739843, upload-time = "2026-02-10T03:33:30.318Z" }, - { url = "https://files.pythonhosted.org/packages/e7/06/3d88f0fb29b7343426b035f21d90d61c872b83243895e9548d880e08f60a/nanoarrow-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1a910eaae1c60838ea9d11d247aba88cb17c02b67430ec88569a1ae68a7bb25", size = 969064, upload-time = "2026-02-10T03:32:32.669Z" }, - { url = "https://files.pythonhosted.org/packages/a0/aa/e5655fd8d8a6defb0bed22e2de695f974a759798f10775de19f5a924156a/nanoarrow-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8520fe9ab310d855376e4faed742f683390bbab7b5dd230da398cb79f3deb29", size = 1018919, upload-time = "2026-02-10T03:32:56.317Z" }, - { url = "https://files.pythonhosted.org/packages/94/16/db9fedc1d916ba6f66537a992144fb08ddc2495dd5b61a4a2710e5518ec4/nanoarrow-0.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78a5cbd6f3664e552280688dcae765424587d7392577774f7cd7191f654e71ab", size = 1133563, upload-time = "2026-02-10T03:32:58.884Z" }, - { url = "https://files.pythonhosted.org/packages/6e/10/68374d91b1a55f38e4f96ef0f32ed6fd72826aeae6e3c7de45b635937244/nanoarrow-0.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8412b5594cef5e86f35a4a3eb05c25842c38f357926d13610b54dc1d99ffa2df", size = 991138, upload-time = "2026-02-10T03:32:34.182Z" }, - { url = "https://files.pythonhosted.org/packages/4c/eb/ec98442b8b03ce2e9c3150b6ead5c2475253c462ab2b54808be52f6596bd/nanoarrow-0.8.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d6adfdc1129d3351e6a64e09749c2460270a49eea46a9badff16a15f31104e59", size = 1153814, upload-time = "2026-02-10T03:33:00.013Z" }, - { url = "https://files.pythonhosted.org/packages/c6/74/a3573db8c4b1de39b2ccca439479e408d0b40fd411c501299c3836f43c95/nanoarrow-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebe2b9a7a25b3cc0f86f215e12f67bdfe8925a112ceda86c82d7633fc14fc52d", size = 1032100, upload-time = "2026-02-10T03:33:01.048Z" }, - { url = "https://files.pythonhosted.org/packages/24/29/df629c41d2246fb7d0ad5f191296e5957389774a83f8097357e3073cc0cf/nanoarrow-0.8.0-cp311-cp311-win32.whl", hash = "sha256:196b561557a26295862b181f204790c9fd308bdc78df30247b0e4c0b775b4a48", size = 563662, upload-time = "2026-02-10T03:35:56.048Z" }, - { url = "https://files.pythonhosted.org/packages/17/b8/54001df497f4fdbf7121db2d61090bf9986298a9eba4ed2cbfc9aad414f0/nanoarrow-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:2cc015aa3905c3f0b798570975297730d1428a23768805a23202bc48d0eaabcd", size = 658770, upload-time = "2026-02-10T03:33:51.924Z" }, - { url = "https://files.pythonhosted.org/packages/9d/20/02ef20b340c7f765192309b87685e56c88cda203a4effac04b5d9347626a/nanoarrow-0.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c5f27749e2b5218e69b484e01f4c08007386e1333fbb110f400354bde0612799", size = 840224, upload-time = "2026-02-10T03:33:42.615Z" }, - { url = "https://files.pythonhosted.org/packages/94/84/b1b5d807483f882b7309799d96ec122daaa69890d80c2994f476d4e07c51/nanoarrow-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c84efa8efba413a1cecee7d10d9e5dfbf7651026538449c5d554c1af19932791", size = 732615, upload-time = "2026-02-10T03:33:31.79Z" }, - { url = "https://files.pythonhosted.org/packages/bc/9e/51a6b437cf173728e03e16e32aee865b36f2043478f4e2688ea2187f63ad/nanoarrow-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0548987b4d32536768e092478e3fe8968f35f9624354e30efa622e32c5d87944", size = 955080, upload-time = "2026-02-10T03:32:35.741Z" }, - { url = "https://files.pythonhosted.org/packages/c8/12/9fed89e0d76ad8c376fe74d12b7e1a7cbcb75ff8ebb242264a1d980f5529/nanoarrow-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ae9d43a358cd6f13e9569c010de36e7e3e98b7da059bdf83438d5e7ce2f77f4", size = 1009196, upload-time = "2026-02-10T03:33:02.057Z" }, - { url = "https://files.pythonhosted.org/packages/9f/1a/eb1a7036f2dbb30748eda66d479319cfe165eea6e6748c94488c484be7f4/nanoarrow-0.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc83b0b5636a3e558588c0eb6e3c32e295d0296909a08f3b4af17c81a2db8bf6", size = 1119470, upload-time = "2026-02-10T03:33:03.696Z" }, - { url = "https://files.pythonhosted.org/packages/9f/c4/d2178bccb12aaeef5843c90e671faf1a6247bdb8b4d64454fc471e97eb71/nanoarrow-0.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:29e0783f9ff2b802cd883a41f5cc009f70efea519efcc710db7d143819d1d315", size = 979664, upload-time = "2026-02-10T03:32:37.594Z" }, - { url = "https://files.pythonhosted.org/packages/7e/34/f52319f9304659a5ed5db125b635316ce6d042767cde257fcf9c6a7f80e1/nanoarrow-0.8.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:20d07a0ac666e9563e132a2097de5e9fa26b4781c0f8edfbdce0da866c22faba", size = 1144197, upload-time = "2026-02-10T03:33:04.824Z" }, - { url = "https://files.pythonhosted.org/packages/76/45/3b56702078b7515ff9b74b215ea983358df11140a6c3b7056f55551828da/nanoarrow-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3706258dc89231ef27dee50a26d53ec96dba85dbafa8d6637276bd212be4bc1b", size = 1026594, upload-time = "2026-02-10T03:33:06.055Z" }, - { url = "https://files.pythonhosted.org/packages/d7/f6/fe382bf2770a7e522f132e5310350fb0aecc3023f876d02265a7f40c7c79/nanoarrow-0.8.0-cp312-cp312-win32.whl", hash = "sha256:6ab8bd2db59388c6bd131c4d9e2649a6626ffe7434084cee6c22fdfbedfeda1b", size = 569212, upload-time = "2026-02-10T03:35:57.767Z" }, - { url = "https://files.pythonhosted.org/packages/c5/38/589e3c41490a742c639221eea655cf5d0a5972242efab8040a0c904a7dba/nanoarrow-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:22c3443ebc0b988dff06cb88d03cf9accbf85fdde905fb7d76b6e001561855a8", size = 645746, upload-time = "2026-02-10T03:33:53.147Z" }, - { url = "https://files.pythonhosted.org/packages/8e/af/b7df299b87348d396d049ef9fab6bef76d29c63288e5b54f752b97f7b3df/nanoarrow-0.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:605c7af9130599c40264d14c295dcc2a779402183c13f4189e7475b9dc52613a", size = 838886, upload-time = "2026-02-10T03:33:43.624Z" }, - { url = "https://files.pythonhosted.org/packages/07/ec/02fd6979c35e347e6d5cf57757616a6d599d4ac6808bf0a37ca334639d07/nanoarrow-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:64d49118b5477bef8af5fba0b66ad032e1f9861f70d210c262b5393e5b62f47d", size = 730110, upload-time = "2026-02-10T03:33:32.771Z" }, - { url = "https://files.pythonhosted.org/packages/d2/04/64beb88b036a9d20d0f8be0846d9db7912c3332f3969ecd66144a4fd2021/nanoarrow-0.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1b64aa3739efbe85e775ba5733e37713521386d3014c866f9065815b7387114", size = 951234, upload-time = "2026-02-10T03:32:39.119Z" }, - { url = "https://files.pythonhosted.org/packages/53/3d/1850ef02a632fa5d65319c1155c326982896828ffbfd88c8fc44ee1a23aa/nanoarrow-0.8.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b4a363e697b3e852fd1f374840df22aaac0323fb8d0ab24a50c3ea1090b4594", size = 1005525, upload-time = "2026-02-10T03:33:07.588Z" }, - { url = "https://files.pythonhosted.org/packages/94/4b/3c671773e6dcce1784b4e42d0e5f5942fee49f6ddf7ae2567d36b3b4248e/nanoarrow-0.8.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:381a2a65b0bcfe36b267d506072a3a5c83b8326dfbb50dff2d7f55ac25159f69", size = 1120370, upload-time = "2026-02-10T03:33:08.715Z" }, - { url = "https://files.pythonhosted.org/packages/4a/79/bc49e7518ba9e5b377ca3670ceba5949cb3e20363ba7f091df62d84c4edd/nanoarrow-0.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cbca73fcb5c2c753ddac3695774e47cbed3b3bc64dba34866f3056e33a2a0ac2", size = 977504, upload-time = "2026-02-10T03:32:40.349Z" }, - { url = "https://files.pythonhosted.org/packages/9f/cb/bb57665133351b042b4c25d549b21fc9bb9f56a3c5f4e5d561c41f5d705c/nanoarrow-0.8.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a4539c5767723cf0c0a21b401acc7d706ca7fd84302b6be514eeb5b8ee230903", size = 1141114, upload-time = "2026-02-10T03:33:10.575Z" }, - { url = "https://files.pythonhosted.org/packages/5a/a0/2792c5e160d56b5abe782228a963ae3d7477727bf950f6b990ebcfed8f49/nanoarrow-0.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:be0899058f66d3b7e4e4b7cfe125e95625e109b4513a81fd9bc098abef55a381", size = 1025080, upload-time = "2026-02-10T03:33:11.661Z" }, - { url = "https://files.pythonhosted.org/packages/8e/45/5209dad8a3e4f460ca7d7d314ff34ef6426ced873655df1a469b0f91e01d/nanoarrow-0.8.0-cp313-cp313-win32.whl", hash = "sha256:7c227e1e68926b0ccde7336211dd7a11f8983098b3698ee548756bdb778b016d", size = 568315, upload-time = "2026-02-10T03:35:58.998Z" }, - { url = "https://files.pythonhosted.org/packages/d2/41/b2ad2b541b94422e4091a96192deb5c98d5a6b4c44ade37f5bd6d3efd83f/nanoarrow-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:1730cb374739459a925c590c32e07e213c9c6ddd2e12f44547e2bd70d29a7a9b", size = 644676, upload-time = "2026-02-10T03:33:54.301Z" }, - { url = "https://files.pythonhosted.org/packages/87/7a/5e2d1005f98cca18ebb289cffbb55fe0895465349affbe4cfb1321de9ad0/nanoarrow-0.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:dfa96964d2ccd762a5cb8e28eb0439b6c05b4f5090c4ca2d0207c32d8093cda5", size = 863391, upload-time = "2026-02-10T03:33:44.864Z" }, - { url = "https://files.pythonhosted.org/packages/5e/63/e45fd81a0a35bc782161801e2bec03794184504eedc7760fa79b33e333ca/nanoarrow-0.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:714b21daefe495d7cdd6dad34d3409ae42a77f4ef6bf648f4688d0abef8924c1", size = 779228, upload-time = "2026-02-10T03:33:33.9Z" }, - { url = "https://files.pythonhosted.org/packages/3e/a0/f8173511a74b48d2c3b88f7a337faaca8c01b3255a53b065db171e63fa85/nanoarrow-0.8.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3777298c348b268b3490504d9ba099dc6eede702bb9f337360dec6412944a187", size = 967376, upload-time = "2026-02-10T03:32:41.885Z" }, - { url = "https://files.pythonhosted.org/packages/f7/cf/4c885fb3a605a17607cfd8cc9f7b23aba19f9826c3bfe4dcf300b0a8e48c/nanoarrow-0.8.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e31ee3e3b1e94eccc4cc034f02956ecd15b4ae33ae8a1f999704871ea3b6dec", size = 1014554, upload-time = "2026-02-10T03:33:12.916Z" }, - { url = "https://files.pythonhosted.org/packages/43/07/190f7b4746b0d691dbea0f4c36c34012d916d3579af7ae83254a1d9f6f26/nanoarrow-0.8.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7bda72e24dd8e2abb3f445f129a422809d788db9cfbbfd247c32f5620e03128c", size = 1115168, upload-time = "2026-02-10T03:33:14.533Z" }, - { url = "https://files.pythonhosted.org/packages/8e/58/abd834fc30abcb053642e5935911be9a442c6c5d48c7c6f855c8de2f329d/nanoarrow-0.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d2ee1a27a7210c8eba6ac6e8ab70b598da238348b125b53b16d9e1ae0313addc", size = 984855, upload-time = "2026-02-10T03:32:43.422Z" }, - { url = "https://files.pythonhosted.org/packages/18/62/ca4977054d7267ce3756409425b82fe1ea916871555f2512872ec8f7e0d4/nanoarrow-0.8.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:91466de52617b25dff7349dbf18cc612ce5ec35d09f025b37ea60be819808be8", size = 1122634, upload-time = "2026-02-10T03:33:15.699Z" }, - { url = "https://files.pythonhosted.org/packages/16/b3/75b71c46a3950b06ae3f63cb426ba92a9ebfe2aaa216845c8a4cc56b1bb7/nanoarrow-0.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4204e2b5f9cf895bcecfe432b03c346ec2bdadfda0174c8ab195acc6b4794986", size = 1022700, upload-time = "2026-02-10T03:33:16.802Z" }, - { url = "https://files.pythonhosted.org/packages/89/12/3a3337b17de7c3c3ff1bfc09a01c75d8f463e40e6850c8f5e42d4240c9a7/nanoarrow-0.8.0-cp313-cp313t-win32.whl", hash = "sha256:1fdc0c2508b53a83c9814fdcd2d4bac6d98ea989fb363e0d88d329a8cddd7d50", size = 624159, upload-time = "2026-02-10T03:36:00.135Z" }, - { url = "https://files.pythonhosted.org/packages/2c/a8/80c9ed4718e253e7f19320fcd69ca8c7c9ed87d32848d3da97afee3d8b6b/nanoarrow-0.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b73748e0f39cd8dc1ce33eaad3215f2aff6aebb03e659c26d2a8df9277e7e509", size = 712076, upload-time = "2026-02-10T03:33:55.345Z" }, - { url = "https://files.pythonhosted.org/packages/a4/6f/167cbe632266e8e84d8965262a5e3121e073f593140701bc9be06062f8da/nanoarrow-0.8.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:51e9609efad27191e6506b9c224c90ae49a0c72f641c8094f168d4694b45a3ff", size = 775361, upload-time = "2026-02-10T03:33:47.176Z" }, - { url = "https://files.pythonhosted.org/packages/32/94/762f77b6b0fa7a6787316af297a239b59b1f36e37122b0770ff3cfe61e3d/nanoarrow-0.8.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:185726c467211592ba47933949cb62bc6e1797eefdd760a145b241c44377fba9", size = 692089, upload-time = "2026-02-10T03:33:36.46Z" }, - { url = "https://files.pythonhosted.org/packages/27/c3/75ac260a7e5cd00b72c35248897bc6f899d4e65457141160978ce6258601/nanoarrow-0.8.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c11edd20949a874afb0e50f08402ea3f5c5206d70ec7ed2c27d8064a36222038", size = 903435, upload-time = "2026-02-10T03:32:47.413Z" }, - { url = "https://files.pythonhosted.org/packages/70/ce/26d6673123afe22ad04b68ca90f800133f75c55792355959037e81ddc8a2/nanoarrow-0.8.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c2b31fdab6b5fb3d3c10f7597e16698c9d3db1bac4c645341e6e36320b78642", size = 948741, upload-time = "2026-02-10T03:33:22.331Z" }, - { url = "https://files.pythonhosted.org/packages/9a/ad/f3b7b205ff1a2e755dcc90e7df4ede0f2a7eb6d217f2ab626ef2b00ee0e3/nanoarrow-0.8.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df0f118c7ba1036adf032d909674cb925a37ceeed83756c43d27ff9ad225b9e1", size = 1055379, upload-time = "2026-02-10T03:33:23.481Z" }, - { url = "https://files.pythonhosted.org/packages/9b/13/623183d5df76a4e3835af9e42a6d63dcc46d3d3e22d846d48b4458cf5cfb/nanoarrow-0.8.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:827b3e3f8ba81c3b2a9de72dd6cddd74afc7e4cf03aacb0b7f6f2ac06747ae88", size = 591477, upload-time = "2026-02-10T03:33:57.652Z" }, - { url = "https://files.pythonhosted.org/packages/96/97/6265c84c3c865d2fc1fd56954c60a9386e03ab9c9db11c5f2d57fafa1077/nanoarrow-0.8.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:a765f955a6bfb989af1d37fa3d0c4f89c242fe12088b5e787748f995a5fa13fc", size = 775344, upload-time = "2026-02-10T03:33:48.237Z" }, - { url = "https://files.pythonhosted.org/packages/cd/f2/daaf03224b88cb66b1a6a19da371386f875e95208a42c73b109f1d273166/nanoarrow-0.8.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:b2023fa0d5d6992fd8a5a04452c559817c9805aea7391fa46291aaf381a6aa19", size = 692002, upload-time = "2026-02-10T03:33:37.991Z" }, - { url = "https://files.pythonhosted.org/packages/55/53/c058976db13e18106737a1fddf192e45022375628a38c2caaa51a9934ada/nanoarrow-0.8.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96c7d723b5615e2e9c5f7ba7b5af8d80ba90ecf9871ba005941ac80355ef556a", size = 904133, upload-time = "2026-02-10T03:32:49.155Z" }, - { url = "https://files.pythonhosted.org/packages/d5/3f/002a228af17ecba07ca9ff47628e97c73e336a72fd18ad5d78534a6497d8/nanoarrow-0.8.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c22b03d1ceca21aace2c8053ed43cac5566e69dd1660708783fe0e84dd35693e", size = 949714, upload-time = "2026-02-10T03:33:24.542Z" }, - { url = "https://files.pythonhosted.org/packages/28/5e/3bad2cfeb03d0682b93f13640ede98eb59cf15b4d868d5c9745118f59eb2/nanoarrow-0.8.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:999f906c297203b5430dc4e79e662301f5ab02a793b6fc67973ee3c0518fb936", size = 1056467, upload-time = "2026-02-10T03:33:25.617Z" }, - { url = "https://files.pythonhosted.org/packages/b9/e5/c740ea047b5ada76175327360d0406ae283159cb1745cbcb51443d90d53b/nanoarrow-0.8.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8c5529abc4e75b7764ffc6d2fbabd0c676f75ca2ece71a8671c4724207cfb697", size = 591889, upload-time = "2026-02-10T03:33:58.891Z" }, -] - -[[package]] -name = "nodeenv" -version = "1.9.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" }, -] - -[[package]] -name = "numpy" -version = "2.2.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12' and python_full_version < '3.14'", - "python_full_version == '3.11.*'", - "python_full_version < '3.11'", -] -sdist = { url = "https://files.pythonhosted.org/packages/f2/a5/fdbf6a7871703df6160b5cf3dd774074b086d278172285c52c2758b76305/numpy-2.2.1.tar.gz", hash = "sha256:45681fd7128c8ad1c379f0ca0776a8b0c6583d2f69889ddac01559dfe4390918", size = 20227662, upload-time = "2024-12-21T22:49:36.523Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/c4/5588367dc9f91e1a813beb77de46ea8cab13f778e1b3a0e661ab031aba44/numpy-2.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5edb4e4caf751c1518e6a26a83501fda79bff41cc59dac48d70e6d65d4ec4440", size = 21213214, upload-time = "2024-12-21T20:29:57.832Z" }, - { url = "https://files.pythonhosted.org/packages/d8/8b/32dd9f08419023a4cf856c5ad0b4eba9b830da85eafdef841a104c4fc05a/numpy-2.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aa3017c40d513ccac9621a2364f939d39e550c542eb2a894b4c8da92b38896ab", size = 14352248, upload-time = "2024-12-21T20:30:32.954Z" }, - { url = "https://files.pythonhosted.org/packages/84/2d/0e895d02940ba6e12389f0ab5cac5afcf8dc2dc0ade4e8cad33288a721bd/numpy-2.2.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:61048b4a49b1c93fe13426e04e04fdf5a03f456616f6e98c7576144677598675", size = 5391007, upload-time = "2024-12-21T20:30:46.067Z" }, - { url = "https://files.pythonhosted.org/packages/11/b9/7f1e64a0d46d9c2af6d17966f641fb12d5b8ea3003f31b2308f3e3b9a6aa/numpy-2.2.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:7671dc19c7019103ca44e8d94917eba8534c76133523ca8406822efdd19c9308", size = 6926174, upload-time = "2024-12-21T20:31:07.682Z" }, - { url = "https://files.pythonhosted.org/packages/2e/8c/043fa4418bc9364e364ab7aba8ff6ef5f6b9171ade22de8fbcf0e2fa4165/numpy-2.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4250888bcb96617e00bfa28ac24850a83c9f3a16db471eca2ee1f1714df0f957", size = 14330914, upload-time = "2024-12-21T20:31:31.641Z" }, - { url = "https://files.pythonhosted.org/packages/f7/b6/d8110985501ca8912dfc1c3bbef99d66e62d487f72e46b2337494df77364/numpy-2.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7746f235c47abc72b102d3bce9977714c2444bdfaea7888d241b4c4bb6a78bf", size = 16379607, upload-time = "2024-12-21T20:32:06.43Z" }, - { url = "https://files.pythonhosted.org/packages/e2/57/bdca9fb8bdaa810c3a4ff2eb3231379b77f618a7c0d24be9f7070db50775/numpy-2.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:059e6a747ae84fce488c3ee397cee7e5f905fd1bda5fb18c66bc41807ff119b2", size = 15541760, upload-time = "2024-12-21T20:32:46.421Z" }, - { url = "https://files.pythonhosted.org/packages/97/55/3b9147b3cbc3b6b1abc2a411dec5337a46c873deca0dd0bf5bef9d0579cc/numpy-2.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f62aa6ee4eb43b024b0e5a01cf65a0bb078ef8c395e8713c6e8a12a697144528", size = 18168476, upload-time = "2024-12-21T22:25:15.062Z" }, - { url = "https://files.pythonhosted.org/packages/00/e7/7c2cde16c9b87a8e14fdd262ca7849c4681cf48c8a774505f7e6f5e3b643/numpy-2.2.1-cp310-cp310-win32.whl", hash = "sha256:48fd472630715e1c1c89bf1feab55c29098cb403cc184b4859f9c86d4fcb6a95", size = 6570985, upload-time = "2024-12-21T22:25:31.2Z" }, - { url = "https://files.pythonhosted.org/packages/a1/a8/554b0e99fc4ac11ec481254781a10da180d0559c2ebf2c324232317349ee/numpy-2.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:b541032178a718c165a49638d28272b771053f628382d5e9d1c93df23ff58dbf", size = 12913384, upload-time = "2024-12-21T22:25:54.717Z" }, - { url = "https://files.pythonhosted.org/packages/59/14/645887347124e101d983e1daf95b48dc3e136bf8525cb4257bf9eab1b768/numpy-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:40f9e544c1c56ba8f1cf7686a8c9b5bb249e665d40d626a23899ba6d5d9e1484", size = 21217379, upload-time = "2024-12-21T22:26:52.153Z" }, - { url = "https://files.pythonhosted.org/packages/9f/fd/2279000cf29f58ccfd3778cbf4670dfe3f7ce772df5e198c5abe9e88b7d7/numpy-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9b57eaa3b0cd8db52049ed0330747b0364e899e8a606a624813452b8203d5f7", size = 14388520, upload-time = "2024-12-21T22:27:29.302Z" }, - { url = "https://files.pythonhosted.org/packages/58/b0/034eb5d5ba12d66ab658ff3455a31f20add0b78df8203c6a7451bd1bee21/numpy-2.2.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:bc8a37ad5b22c08e2dbd27df2b3ef7e5c0864235805b1e718a235bcb200cf1cb", size = 5389286, upload-time = "2024-12-21T22:27:42.369Z" }, - { url = "https://files.pythonhosted.org/packages/5d/69/6f3cccde92e82e7835fdb475c2bf439761cbf8a1daa7c07338e1e132dfec/numpy-2.2.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9036d6365d13b6cbe8f27a0eaf73ddcc070cae584e5ff94bb45e3e9d729feab5", size = 6930345, upload-time = "2024-12-21T22:28:02.349Z" }, - { url = "https://files.pythonhosted.org/packages/d1/72/1cd38e91ab563e67f584293fcc6aca855c9ae46dba42e6b5ff4600022899/numpy-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51faf345324db860b515d3f364eaa93d0e0551a88d6218a7d61286554d190d73", size = 14335748, upload-time = "2024-12-21T22:28:33.546Z" }, - { url = "https://files.pythonhosted.org/packages/f2/d4/f999444e86986f3533e7151c272bd8186c55dda554284def18557e013a2a/numpy-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38efc1e56b73cc9b182fe55e56e63b044dd26a72128fd2fbd502f75555d92591", size = 16391057, upload-time = "2024-12-21T22:29:06.549Z" }, - { url = "https://files.pythonhosted.org/packages/99/7b/85cef6a3ae1b19542b7afd97d0b296526b6ef9e3c43ea0c4d9c4404fb2d0/numpy-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:31b89fa67a8042e96715c68e071a1200c4e172f93b0fbe01a14c0ff3ff820fc8", size = 15556943, upload-time = "2024-12-21T22:30:03.919Z" }, - { url = "https://files.pythonhosted.org/packages/69/7e/b83cc884c3508e91af78760f6b17ab46ad649831b1fa35acb3eb26d9e6d2/numpy-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4c86e2a209199ead7ee0af65e1d9992d1dce7e1f63c4b9a616500f93820658d0", size = 18180785, upload-time = "2024-12-21T22:30:41.924Z" }, - { url = "https://files.pythonhosted.org/packages/b2/9f/eb4a9a38867de059dcd4b6e18d47c3867fbd3795d4c9557bb49278f94087/numpy-2.2.1-cp311-cp311-win32.whl", hash = "sha256:b34d87e8a3090ea626003f87f9392b3929a7bbf4104a05b6667348b6bd4bf1cd", size = 6568983, upload-time = "2024-12-21T22:30:56.619Z" }, - { url = "https://files.pythonhosted.org/packages/6d/1e/be3b9f3073da2f8c7fa361fcdc231b548266b0781029fdbaf75eeab997fd/numpy-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:360137f8fb1b753c5cde3ac388597ad680eccbbbb3865ab65efea062c4a1fd16", size = 12917260, upload-time = "2024-12-21T22:31:22.151Z" }, - { url = "https://files.pythonhosted.org/packages/62/12/b928871c570d4a87ab13d2cc19f8817f17e340d5481621930e76b80ffb7d/numpy-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:694f9e921a0c8f252980e85bce61ebbd07ed2b7d4fa72d0e4246f2f8aa6642ab", size = 20909861, upload-time = "2024-12-21T22:32:05.145Z" }, - { url = "https://files.pythonhosted.org/packages/3d/c3/59df91ae1d8ad7c5e03efd63fd785dec62d96b0fe56d1f9ab600b55009af/numpy-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3683a8d166f2692664262fd4900f207791d005fb088d7fdb973cc8d663626faa", size = 14095776, upload-time = "2024-12-21T22:32:37.312Z" }, - { url = "https://files.pythonhosted.org/packages/af/4e/8ed5868efc8e601fb69419644a280e9c482b75691466b73bfaab7d86922c/numpy-2.2.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:780077d95eafc2ccc3ced969db22377b3864e5b9a0ea5eb347cc93b3ea900315", size = 5126239, upload-time = "2024-12-21T22:32:59.288Z" }, - { url = "https://files.pythonhosted.org/packages/1a/74/dd0bbe650d7bc0014b051f092f2de65e34a8155aabb1287698919d124d7f/numpy-2.2.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:55ba24ebe208344aa7a00e4482f65742969a039c2acfcb910bc6fcd776eb4355", size = 6659296, upload-time = "2024-12-21T22:33:11.456Z" }, - { url = "https://files.pythonhosted.org/packages/7f/11/4ebd7a3f4a655764dc98481f97bd0a662fb340d1001be6050606be13e162/numpy-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b1d07b53b78bf84a96898c1bc139ad7f10fda7423f5fd158fd0f47ec5e01ac7", size = 14047121, upload-time = "2024-12-21T22:33:47.216Z" }, - { url = "https://files.pythonhosted.org/packages/7f/a7/c1f1d978166eb6b98ad009503e4d93a8c1962d0eb14a885c352ee0276a54/numpy-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5062dc1a4e32a10dc2b8b13cedd58988261416e811c1dc4dbdea4f57eea61b0d", size = 16096599, upload-time = "2024-12-21T22:34:27.868Z" }, - { url = "https://files.pythonhosted.org/packages/3d/6d/0e22afd5fcbb4d8d0091f3f46bf4e8906399c458d4293da23292c0ba5022/numpy-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fce4f615f8ca31b2e61aa0eb5865a21e14f5629515c9151850aa936c02a1ee51", size = 15243932, upload-time = "2024-12-21T22:35:05.318Z" }, - { url = "https://files.pythonhosted.org/packages/03/39/e4e5832820131ba424092b9610d996b37e5557180f8e2d6aebb05c31ae54/numpy-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:67d4cda6fa6ffa073b08c8372aa5fa767ceb10c9a0587c707505a6d426f4e046", size = 17861032, upload-time = "2024-12-21T22:35:37.77Z" }, - { url = "https://files.pythonhosted.org/packages/5f/8a/3794313acbf5e70df2d5c7d2aba8718676f8d054a05abe59e48417fb2981/numpy-2.2.1-cp312-cp312-win32.whl", hash = "sha256:32cb94448be47c500d2c7a95f93e2f21a01f1fd05dd2beea1ccd049bb6001cd2", size = 6274018, upload-time = "2024-12-21T22:35:51.117Z" }, - { url = "https://files.pythonhosted.org/packages/17/c1/c31d3637f2641e25c7a19adf2ae822fdaf4ddd198b05d79a92a9ce7cb63e/numpy-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:ba5511d8f31c033a5fcbda22dd5c813630af98c70b2661f2d2c654ae3cdfcfc8", size = 12613843, upload-time = "2024-12-21T22:36:22.816Z" }, - { url = "https://files.pythonhosted.org/packages/20/d6/91a26e671c396e0c10e327b763485ee295f5a5a7a48c553f18417e5a0ed5/numpy-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f1d09e520217618e76396377c81fba6f290d5f926f50c35f3a5f72b01a0da780", size = 20896464, upload-time = "2024-12-21T22:37:01.393Z" }, - { url = "https://files.pythonhosted.org/packages/8c/40/5792ccccd91d45e87d9e00033abc4f6ca8a828467b193f711139ff1f1cd9/numpy-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3ecc47cd7f6ea0336042be87d9e7da378e5c7e9b3c8ad0f7c966f714fc10d821", size = 14111350, upload-time = "2024-12-21T22:37:35.152Z" }, - { url = "https://files.pythonhosted.org/packages/c0/2a/fb0a27f846cb857cef0c4c92bef89f133a3a1abb4e16bba1c4dace2e9b49/numpy-2.2.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f419290bc8968a46c4933158c91a0012b7a99bb2e465d5ef5293879742f8797e", size = 5111629, upload-time = "2024-12-21T22:37:51.291Z" }, - { url = "https://files.pythonhosted.org/packages/eb/e5/8e81bb9d84db88b047baf4e8b681a3e48d6390bc4d4e4453eca428ecbb49/numpy-2.2.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b6c390bfaef8c45a260554888966618328d30e72173697e5cabe6b285fb2348", size = 6645865, upload-time = "2024-12-21T22:38:03.738Z" }, - { url = "https://files.pythonhosted.org/packages/7a/1a/a90ceb191dd2f9e2897c69dde93ccc2d57dd21ce2acbd7b0333e8eea4e8d/numpy-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:526fc406ab991a340744aad7e25251dd47a6720a685fa3331e5c59fef5282a59", size = 14043508, upload-time = "2024-12-21T22:38:41.854Z" }, - { url = "https://files.pythonhosted.org/packages/f1/5a/e572284c86a59dec0871a49cd4e5351e20b9c751399d5f1d79628c0542cb/numpy-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f74e6fdeb9a265624ec3a3918430205dff1df7e95a230779746a6af78bc615af", size = 16094100, upload-time = "2024-12-21T22:39:12.904Z" }, - { url = "https://files.pythonhosted.org/packages/0c/2c/a79d24f364788386d85899dd280a94f30b0950be4b4a545f4fa4ed1d4ca7/numpy-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:53c09385ff0b72ba79d8715683c1168c12e0b6e84fb0372e97553d1ea91efe51", size = 15239691, upload-time = "2024-12-21T22:39:48.32Z" }, - { url = "https://files.pythonhosted.org/packages/cf/79/1e20fd1c9ce5a932111f964b544facc5bb9bde7865f5b42f00b4a6a9192b/numpy-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f3eac17d9ec51be534685ba877b6ab5edc3ab7ec95c8f163e5d7b39859524716", size = 17856571, upload-time = "2024-12-21T22:40:22.575Z" }, - { url = "https://files.pythonhosted.org/packages/be/5b/cc155e107f75d694f562bdc84a26cc930569f3dfdfbccb3420b626065777/numpy-2.2.1-cp313-cp313-win32.whl", hash = "sha256:9ad014faa93dbb52c80d8f4d3dcf855865c876c9660cb9bd7553843dd03a4b1e", size = 6270841, upload-time = "2024-12-21T22:45:15.101Z" }, - { url = "https://files.pythonhosted.org/packages/44/be/0e5cd009d2162e4138d79a5afb3b5d2341f0fe4777ab6e675aa3d4a42e21/numpy-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:164a829b6aacf79ca47ba4814b130c4020b202522a93d7bff2202bfb33b61c60", size = 12606618, upload-time = "2024-12-21T22:45:47.227Z" }, - { url = "https://files.pythonhosted.org/packages/a8/87/04ddf02dd86fb17c7485a5f87b605c4437966d53de1e3745d450343a6f56/numpy-2.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4dfda918a13cc4f81e9118dea249e192ab167a0bb1966272d5503e39234d694e", size = 20921004, upload-time = "2024-12-21T22:40:58.532Z" }, - { url = "https://files.pythonhosted.org/packages/6e/3e/d0e9e32ab14005425d180ef950badf31b862f3839c5b927796648b11f88a/numpy-2.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:733585f9f4b62e9b3528dd1070ec4f52b8acf64215b60a845fa13ebd73cd0712", size = 14119910, upload-time = "2024-12-21T22:41:41.298Z" }, - { url = "https://files.pythonhosted.org/packages/b5/5b/aa2d1905b04a8fb681e08742bb79a7bddfc160c7ce8e1ff6d5c821be0236/numpy-2.2.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:89b16a18e7bba224ce5114db863e7029803c179979e1af6ad6a6b11f70545008", size = 5153612, upload-time = "2024-12-21T22:41:52.23Z" }, - { url = "https://files.pythonhosted.org/packages/ce/35/6831808028df0648d9b43c5df7e1051129aa0d562525bacb70019c5f5030/numpy-2.2.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:676f4eebf6b2d430300f1f4f4c2461685f8269f94c89698d832cdf9277f30b84", size = 6668401, upload-time = "2024-12-21T22:42:05.378Z" }, - { url = "https://files.pythonhosted.org/packages/b1/38/10ef509ad63a5946cc042f98d838daebfe7eaf45b9daaf13df2086b15ff9/numpy-2.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f5cdf9f493b35f7e41e8368e7d7b4bbafaf9660cba53fb21d2cd174ec09631", size = 14014198, upload-time = "2024-12-21T22:42:36.414Z" }, - { url = "https://files.pythonhosted.org/packages/df/f8/c80968ae01df23e249ee0a4487fae55a4c0fe2f838dfe9cc907aa8aea0fa/numpy-2.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1ad395cf254c4fbb5b2132fee391f361a6e8c1adbd28f2cd8e79308a615fe9d", size = 16076211, upload-time = "2024-12-21T22:43:10.125Z" }, - { url = "https://files.pythonhosted.org/packages/09/69/05c169376016a0b614b432967ac46ff14269eaffab80040ec03ae1ae8e2c/numpy-2.2.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:08ef779aed40dbc52729d6ffe7dd51df85796a702afbf68a4f4e41fafdc8bda5", size = 15220266, upload-time = "2024-12-21T22:43:44.16Z" }, - { url = "https://files.pythonhosted.org/packages/f1/ff/94a4ce67ea909f41cf7ea712aebbe832dc67decad22944a1020bb398a5ee/numpy-2.2.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:26c9c4382b19fcfbbed3238a14abf7ff223890ea1936b8890f058e7ba35e8d71", size = 17852844, upload-time = "2024-12-21T22:44:19.029Z" }, - { url = "https://files.pythonhosted.org/packages/46/72/8a5dbce4020dfc595592333ef2fbb0a187d084ca243b67766d29d03e0096/numpy-2.2.1-cp313-cp313t-win32.whl", hash = "sha256:93cf4e045bae74c90ca833cba583c14b62cb4ba2cba0abd2b141ab52548247e2", size = 6326007, upload-time = "2024-12-21T22:44:34.097Z" }, - { url = "https://files.pythonhosted.org/packages/7b/9c/4fce9cf39dde2562584e4cfd351a0140240f82c0e3569ce25a250f47037d/numpy-2.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:bff7d8ec20f5f42607599f9994770fa65d76edca264a87b5e4ea5629bce12268", size = 12693107, upload-time = "2024-12-21T22:44:57.542Z" }, - { url = "https://files.pythonhosted.org/packages/f1/65/d36a76b811ffe0a4515e290cb05cb0e22171b1b0f0db6bee9141cf023545/numpy-2.2.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7ba9cc93a91d86365a5d270dee221fdc04fb68d7478e6bf6af650de78a8339e3", size = 21044672, upload-time = "2024-12-21T22:46:49.317Z" }, - { url = "https://files.pythonhosted.org/packages/aa/3f/b644199f165063154df486d95198d814578f13dd4d8c1651e075bf1cb8af/numpy-2.2.1-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:3d03883435a19794e41f147612a77a8f56d4e52822337844fff3d4040a142964", size = 6789873, upload-time = "2024-12-21T22:47:10.519Z" }, - { url = "https://files.pythonhosted.org/packages/d7/df/2adb0bb98a3cbe8a6c3c6d1019aede1f1d8b83927ced228a46cc56c7a206/numpy-2.2.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4511d9e6071452b944207c8ce46ad2f897307910b402ea5fa975da32e0102800", size = 16194933, upload-time = "2024-12-21T22:47:47.113Z" }, - { url = "https://files.pythonhosted.org/packages/13/3e/1959d5219a9e6d200638d924cedda6a606392f7186a4ed56478252e70d55/numpy-2.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5c5cc0cbabe9452038ed984d05ac87910f89370b9242371bd9079cb4af61811e", size = 12820057, upload-time = "2024-12-21T22:48:36.421Z" }, -] - -[[package]] -name = "numpy" -version = "2.3.4" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.14'", -] -sdist = { url = "https://files.pythonhosted.org/packages/b5/f4/098d2270d52b41f1bd7db9fc288aaa0400cb48c2a3e2af6fa365d9720947/numpy-2.3.4.tar.gz", hash = "sha256:a7d018bfedb375a8d979ac758b120ba846a7fe764911a64465fd87b8729f4a6a", size = 20582187, upload-time = "2025-10-15T16:18:11.77Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/60/e7/0e07379944aa8afb49a556a2b54587b828eb41dc9adc56fb7615b678ca53/numpy-2.3.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e78aecd2800b32e8347ce49316d3eaf04aed849cd5b38e0af39f829a4e59f5eb", size = 21259519, upload-time = "2025-10-15T16:15:19.012Z" }, - { url = "https://files.pythonhosted.org/packages/d0/cb/5a69293561e8819b09e34ed9e873b9a82b5f2ade23dce4c51dc507f6cfe1/numpy-2.3.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fd09cc5d65bda1e79432859c40978010622112e9194e581e3415a3eccc7f43f", size = 14452796, upload-time = "2025-10-15T16:15:23.094Z" }, - { url = "https://files.pythonhosted.org/packages/e4/04/ff11611200acd602a1e5129e36cfd25bf01ad8e5cf927baf2e90236eb02e/numpy-2.3.4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:1b219560ae2c1de48ead517d085bc2d05b9433f8e49d0955c82e8cd37bd7bf36", size = 5381639, upload-time = "2025-10-15T16:15:25.572Z" }, - { url = "https://files.pythonhosted.org/packages/ea/77/e95c757a6fe7a48d28a009267408e8aa382630cc1ad1db7451b3bc21dbb4/numpy-2.3.4-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:bafa7d87d4c99752d07815ed7a2c0964f8ab311eb8168f41b910bd01d15b6032", size = 6914296, upload-time = "2025-10-15T16:15:27.079Z" }, - { url = "https://files.pythonhosted.org/packages/a3/d2/137c7b6841c942124eae921279e5c41b1c34bab0e6fc60c7348e69afd165/numpy-2.3.4-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36dc13af226aeab72b7abad501d370d606326a0029b9f435eacb3b8c94b8a8b7", size = 14591904, upload-time = "2025-10-15T16:15:29.044Z" }, - { url = "https://files.pythonhosted.org/packages/bb/32/67e3b0f07b0aba57a078c4ab777a9e8e6bc62f24fb53a2337f75f9691699/numpy-2.3.4-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a7b2f9a18b5ff9824a6af80de4f37f4ec3c2aab05ef08f51c77a093f5b89adda", size = 16939602, upload-time = "2025-10-15T16:15:31.106Z" }, - { url = "https://files.pythonhosted.org/packages/95/22/9639c30e32c93c4cee3ccdb4b09c2d0fbff4dcd06d36b357da06146530fb/numpy-2.3.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9984bd645a8db6ca15d850ff996856d8762c51a2239225288f08f9050ca240a0", size = 16372661, upload-time = "2025-10-15T16:15:33.546Z" }, - { url = "https://files.pythonhosted.org/packages/12/e9/a685079529be2b0156ae0c11b13d6be647743095bb51d46589e95be88086/numpy-2.3.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:64c5825affc76942973a70acf438a8ab618dbd692b84cd5ec40a0a0509edc09a", size = 18884682, upload-time = "2025-10-15T16:15:36.105Z" }, - { url = "https://files.pythonhosted.org/packages/cf/85/f6f00d019b0cc741e64b4e00ce865a57b6bed945d1bbeb1ccadbc647959b/numpy-2.3.4-cp311-cp311-win32.whl", hash = "sha256:ed759bf7a70342f7817d88376eb7142fab9fef8320d6019ef87fae05a99874e1", size = 6570076, upload-time = "2025-10-15T16:15:38.225Z" }, - { url = "https://files.pythonhosted.org/packages/7d/10/f8850982021cb90e2ec31990291f9e830ce7d94eef432b15066e7cbe0bec/numpy-2.3.4-cp311-cp311-win_amd64.whl", hash = "sha256:faba246fb30ea2a526c2e9645f61612341de1a83fb1e0c5edf4ddda5a9c10996", size = 13089358, upload-time = "2025-10-15T16:15:40.404Z" }, - { url = "https://files.pythonhosted.org/packages/d1/ad/afdd8351385edf0b3445f9e24210a9c3971ef4de8fd85155462fc4321d79/numpy-2.3.4-cp311-cp311-win_arm64.whl", hash = "sha256:4c01835e718bcebe80394fd0ac66c07cbb90147ebbdad3dcecd3f25de2ae7e2c", size = 10462292, upload-time = "2025-10-15T16:15:42.896Z" }, - { url = "https://files.pythonhosted.org/packages/96/7a/02420400b736f84317e759291b8edaeee9dc921f72b045475a9cbdb26b17/numpy-2.3.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ef1b5a3e808bc40827b5fa2c8196151a4c5abe110e1726949d7abddfe5c7ae11", size = 20957727, upload-time = "2025-10-15T16:15:44.9Z" }, - { url = "https://files.pythonhosted.org/packages/18/90/a014805d627aa5750f6f0e878172afb6454552da929144b3c07fcae1bb13/numpy-2.3.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2f91f496a87235c6aaf6d3f3d89b17dba64996abadccb289f48456cff931ca9", size = 14187262, upload-time = "2025-10-15T16:15:47.761Z" }, - { url = "https://files.pythonhosted.org/packages/c7/e4/0a94b09abe89e500dc748e7515f21a13e30c5c3fe3396e6d4ac108c25fca/numpy-2.3.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f77e5b3d3da652b474cc80a14084927a5e86a5eccf54ca8ca5cbd697bf7f2667", size = 5115992, upload-time = "2025-10-15T16:15:50.144Z" }, - { url = "https://files.pythonhosted.org/packages/88/dd/db77c75b055c6157cbd4f9c92c4458daef0dd9cbe6d8d2fe7f803cb64c37/numpy-2.3.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:8ab1c5f5ee40d6e01cbe96de5863e39b215a4d24e7d007cad56c7184fdf4aeef", size = 6648672, upload-time = "2025-10-15T16:15:52.442Z" }, - { url = "https://files.pythonhosted.org/packages/e1/e6/e31b0d713719610e406c0ea3ae0d90760465b086da8783e2fd835ad59027/numpy-2.3.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77b84453f3adcb994ddbd0d1c5d11db2d6bda1a2b7fd5ac5bd4649d6f5dc682e", size = 14284156, upload-time = "2025-10-15T16:15:54.351Z" }, - { url = "https://files.pythonhosted.org/packages/f9/58/30a85127bfee6f108282107caf8e06a1f0cc997cb6b52cdee699276fcce4/numpy-2.3.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4121c5beb58a7f9e6dfdee612cb24f4df5cd4db6e8261d7f4d7450a997a65d6a", size = 16641271, upload-time = "2025-10-15T16:15:56.67Z" }, - { url = "https://files.pythonhosted.org/packages/06/f2/2e06a0f2adf23e3ae29283ad96959267938d0efd20a2e25353b70065bfec/numpy-2.3.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:65611ecbb00ac9846efe04db15cbe6186f562f6bb7e5e05f077e53a599225d16", size = 16059531, upload-time = "2025-10-15T16:15:59.412Z" }, - { url = "https://files.pythonhosted.org/packages/b0/e7/b106253c7c0d5dc352b9c8fab91afd76a93950998167fa3e5afe4ef3a18f/numpy-2.3.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dabc42f9c6577bcc13001b8810d300fe814b4cfbe8a92c873f269484594f9786", size = 18578983, upload-time = "2025-10-15T16:16:01.804Z" }, - { url = "https://files.pythonhosted.org/packages/73/e3/04ecc41e71462276ee867ccbef26a4448638eadecf1bc56772c9ed6d0255/numpy-2.3.4-cp312-cp312-win32.whl", hash = "sha256:a49d797192a8d950ca59ee2d0337a4d804f713bb5c3c50e8db26d49666e351dc", size = 6291380, upload-time = "2025-10-15T16:16:03.938Z" }, - { url = "https://files.pythonhosted.org/packages/3d/a8/566578b10d8d0e9955b1b6cd5db4e9d4592dd0026a941ff7994cedda030a/numpy-2.3.4-cp312-cp312-win_amd64.whl", hash = "sha256:985f1e46358f06c2a09921e8921e2c98168ed4ae12ccd6e5e87a4f1857923f32", size = 12787999, upload-time = "2025-10-15T16:16:05.801Z" }, - { url = "https://files.pythonhosted.org/packages/58/22/9c903a957d0a8071b607f5b1bff0761d6e608b9a965945411f867d515db1/numpy-2.3.4-cp312-cp312-win_arm64.whl", hash = "sha256:4635239814149e06e2cb9db3dd584b2fa64316c96f10656983b8026a82e6e4db", size = 10197412, upload-time = "2025-10-15T16:16:07.854Z" }, - { url = "https://files.pythonhosted.org/packages/57/7e/b72610cc91edf138bc588df5150957a4937221ca6058b825b4725c27be62/numpy-2.3.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c090d4860032b857d94144d1a9976b8e36709e40386db289aaf6672de2a81966", size = 20950335, upload-time = "2025-10-15T16:16:10.304Z" }, - { url = "https://files.pythonhosted.org/packages/3e/46/bdd3370dcea2f95ef14af79dbf81e6927102ddf1cc54adc0024d61252fd9/numpy-2.3.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a13fc473b6db0be619e45f11f9e81260f7302f8d180c49a22b6e6120022596b3", size = 14179878, upload-time = "2025-10-15T16:16:12.595Z" }, - { url = "https://files.pythonhosted.org/packages/ac/01/5a67cb785bda60f45415d09c2bc245433f1c68dd82eef9c9002c508b5a65/numpy-2.3.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:3634093d0b428e6c32c3a69b78e554f0cd20ee420dcad5a9f3b2a63762ce4197", size = 5108673, upload-time = "2025-10-15T16:16:14.877Z" }, - { url = "https://files.pythonhosted.org/packages/c2/cd/8428e23a9fcebd33988f4cb61208fda832800ca03781f471f3727a820704/numpy-2.3.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:043885b4f7e6e232d7df4f51ffdef8c36320ee9d5f227b380ea636722c7ed12e", size = 6641438, upload-time = "2025-10-15T16:16:16.805Z" }, - { url = "https://files.pythonhosted.org/packages/3e/d1/913fe563820f3c6b079f992458f7331278dcd7ba8427e8e745af37ddb44f/numpy-2.3.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4ee6a571d1e4f0ea6d5f22d6e5fbd6ed1dc2b18542848e1e7301bd190500c9d7", size = 14281290, upload-time = "2025-10-15T16:16:18.764Z" }, - { url = "https://files.pythonhosted.org/packages/9e/7e/7d306ff7cb143e6d975cfa7eb98a93e73495c4deabb7d1b5ecf09ea0fd69/numpy-2.3.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fc8a63918b04b8571789688b2780ab2b4a33ab44bfe8ccea36d3eba51228c953", size = 16636543, upload-time = "2025-10-15T16:16:21.072Z" }, - { url = "https://files.pythonhosted.org/packages/47/6a/8cfc486237e56ccfb0db234945552a557ca266f022d281a2f577b98e955c/numpy-2.3.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:40cc556d5abbc54aabe2b1ae287042d7bdb80c08edede19f0c0afb36ae586f37", size = 16056117, upload-time = "2025-10-15T16:16:23.369Z" }, - { url = "https://files.pythonhosted.org/packages/b1/0e/42cb5e69ea901e06ce24bfcc4b5664a56f950a70efdcf221f30d9615f3f3/numpy-2.3.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ecb63014bb7f4ce653f8be7f1df8cbc6093a5a2811211770f6606cc92b5a78fd", size = 18577788, upload-time = "2025-10-15T16:16:27.496Z" }, - { url = "https://files.pythonhosted.org/packages/86/92/41c3d5157d3177559ef0a35da50f0cda7fa071f4ba2306dd36818591a5bc/numpy-2.3.4-cp313-cp313-win32.whl", hash = "sha256:e8370eb6925bb8c1c4264fec52b0384b44f675f191df91cbe0140ec9f0955646", size = 6282620, upload-time = "2025-10-15T16:16:29.811Z" }, - { url = "https://files.pythonhosted.org/packages/09/97/fd421e8bc50766665ad35536c2bb4ef916533ba1fdd053a62d96cc7c8b95/numpy-2.3.4-cp313-cp313-win_amd64.whl", hash = "sha256:56209416e81a7893036eea03abcb91c130643eb14233b2515c90dcac963fe99d", size = 12784672, upload-time = "2025-10-15T16:16:31.589Z" }, - { url = "https://files.pythonhosted.org/packages/ad/df/5474fb2f74970ca8eb978093969b125a84cc3d30e47f82191f981f13a8a0/numpy-2.3.4-cp313-cp313-win_arm64.whl", hash = "sha256:a700a4031bc0fd6936e78a752eefb79092cecad2599ea9c8039c548bc097f9bc", size = 10196702, upload-time = "2025-10-15T16:16:33.902Z" }, - { url = "https://files.pythonhosted.org/packages/11/83/66ac031464ec1767ea3ed48ce40f615eb441072945e98693bec0bcd056cc/numpy-2.3.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:86966db35c4040fdca64f0816a1c1dd8dbd027d90fca5a57e00e1ca4cd41b879", size = 21049003, upload-time = "2025-10-15T16:16:36.101Z" }, - { url = "https://files.pythonhosted.org/packages/5f/99/5b14e0e686e61371659a1d5bebd04596b1d72227ce36eed121bb0aeab798/numpy-2.3.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:838f045478638b26c375ee96ea89464d38428c69170360b23a1a50fa4baa3562", size = 14302980, upload-time = "2025-10-15T16:16:39.124Z" }, - { url = "https://files.pythonhosted.org/packages/2c/44/e9486649cd087d9fc6920e3fc3ac2aba10838d10804b1e179fb7cbc4e634/numpy-2.3.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d7315ed1dab0286adca467377c8381cd748f3dc92235f22a7dfc42745644a96a", size = 5231472, upload-time = "2025-10-15T16:16:41.168Z" }, - { url = "https://files.pythonhosted.org/packages/3e/51/902b24fa8887e5fe2063fd61b1895a476d0bbf46811ab0c7fdf4bd127345/numpy-2.3.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:84f01a4d18b2cc4ade1814a08e5f3c907b079c847051d720fad15ce37aa930b6", size = 6739342, upload-time = "2025-10-15T16:16:43.777Z" }, - { url = "https://files.pythonhosted.org/packages/34/f1/4de9586d05b1962acdcdb1dc4af6646361a643f8c864cef7c852bf509740/numpy-2.3.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:817e719a868f0dacde4abdfc5c1910b301877970195db9ab6a5e2c4bd5b121f7", size = 14354338, upload-time = "2025-10-15T16:16:46.081Z" }, - { url = "https://files.pythonhosted.org/packages/1f/06/1c16103b425de7969d5a76bdf5ada0804b476fed05d5f9e17b777f1cbefd/numpy-2.3.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85e071da78d92a214212cacea81c6da557cab307f2c34b5f85b628e94803f9c0", size = 16702392, upload-time = "2025-10-15T16:16:48.455Z" }, - { url = "https://files.pythonhosted.org/packages/34/b2/65f4dc1b89b5322093572b6e55161bb42e3e0487067af73627f795cc9d47/numpy-2.3.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2ec646892819370cf3558f518797f16597b4e4669894a2ba712caccc9da53f1f", size = 16134998, upload-time = "2025-10-15T16:16:51.114Z" }, - { url = "https://files.pythonhosted.org/packages/d4/11/94ec578896cdb973aaf56425d6c7f2aff4186a5c00fac15ff2ec46998b46/numpy-2.3.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:035796aaaddfe2f9664b9a9372f089cfc88bd795a67bd1bfe15e6e770934cf64", size = 18651574, upload-time = "2025-10-15T16:16:53.429Z" }, - { url = "https://files.pythonhosted.org/packages/62/b7/7efa763ab33dbccf56dade36938a77345ce8e8192d6b39e470ca25ff3cd0/numpy-2.3.4-cp313-cp313t-win32.whl", hash = "sha256:fea80f4f4cf83b54c3a051f2f727870ee51e22f0248d3114b8e755d160b38cfb", size = 6413135, upload-time = "2025-10-15T16:16:55.992Z" }, - { url = "https://files.pythonhosted.org/packages/43/70/aba4c38e8400abcc2f345e13d972fb36c26409b3e644366db7649015f291/numpy-2.3.4-cp313-cp313t-win_amd64.whl", hash = "sha256:15eea9f306b98e0be91eb344a94c0e630689ef302e10c2ce5f7e11905c704f9c", size = 12928582, upload-time = "2025-10-15T16:16:57.943Z" }, - { url = "https://files.pythonhosted.org/packages/67/63/871fad5f0073fc00fbbdd7232962ea1ac40eeaae2bba66c76214f7954236/numpy-2.3.4-cp313-cp313t-win_arm64.whl", hash = "sha256:b6c231c9c2fadbae4011ca5e7e83e12dc4a5072f1a1d85a0a7b3ed754d145a40", size = 10266691, upload-time = "2025-10-15T16:17:00.048Z" }, - { url = "https://files.pythonhosted.org/packages/72/71/ae6170143c115732470ae3a2d01512870dd16e0953f8a6dc89525696069b/numpy-2.3.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:81c3e6d8c97295a7360d367f9f8553973651b76907988bb6066376bc2252f24e", size = 20955580, upload-time = "2025-10-15T16:17:02.509Z" }, - { url = "https://files.pythonhosted.org/packages/af/39/4be9222ffd6ca8a30eda033d5f753276a9c3426c397bb137d8e19dedd200/numpy-2.3.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7c26b0b2bf58009ed1f38a641f3db4be8d960a417ca96d14e5b06df1506d41ff", size = 14188056, upload-time = "2025-10-15T16:17:04.873Z" }, - { url = "https://files.pythonhosted.org/packages/6c/3d/d85f6700d0a4aa4f9491030e1021c2b2b7421b2b38d01acd16734a2bfdc7/numpy-2.3.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:62b2198c438058a20b6704351b35a1d7db881812d8512d67a69c9de1f18ca05f", size = 5116555, upload-time = "2025-10-15T16:17:07.499Z" }, - { url = "https://files.pythonhosted.org/packages/bf/04/82c1467d86f47eee8a19a464c92f90a9bb68ccf14a54c5224d7031241ffb/numpy-2.3.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:9d729d60f8d53a7361707f4b68a9663c968882dd4f09e0d58c044c8bf5faee7b", size = 6643581, upload-time = "2025-10-15T16:17:09.774Z" }, - { url = "https://files.pythonhosted.org/packages/0c/d3/c79841741b837e293f48bd7db89d0ac7a4f2503b382b78a790ef1dc778a5/numpy-2.3.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd0c630cf256b0a7fd9d0a11c9413b42fef5101219ce6ed5a09624f5a65392c7", size = 14299186, upload-time = "2025-10-15T16:17:11.937Z" }, - { url = "https://files.pythonhosted.org/packages/e8/7e/4a14a769741fbf237eec5a12a2cbc7a4c4e061852b6533bcb9e9a796c908/numpy-2.3.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5e081bc082825f8b139f9e9fe42942cb4054524598aaeb177ff476cc76d09d2", size = 16638601, upload-time = "2025-10-15T16:17:14.391Z" }, - { url = "https://files.pythonhosted.org/packages/93/87/1c1de269f002ff0a41173fe01dcc925f4ecff59264cd8f96cf3b60d12c9b/numpy-2.3.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:15fb27364ed84114438fff8aaf998c9e19adbeba08c0b75409f8c452a8692c52", size = 16074219, upload-time = "2025-10-15T16:17:17.058Z" }, - { url = "https://files.pythonhosted.org/packages/cd/28/18f72ee77408e40a76d691001ae599e712ca2a47ddd2c4f695b16c65f077/numpy-2.3.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:85d9fb2d8cd998c84d13a79a09cc0c1091648e848e4e6249b0ccd7f6b487fa26", size = 18576702, upload-time = "2025-10-15T16:17:19.379Z" }, - { url = "https://files.pythonhosted.org/packages/c3/76/95650169b465ececa8cf4b2e8f6df255d4bf662775e797ade2025cc51ae6/numpy-2.3.4-cp314-cp314-win32.whl", hash = "sha256:e73d63fd04e3a9d6bc187f5455d81abfad05660b212c8804bf3b407e984cd2bc", size = 6337136, upload-time = "2025-10-15T16:17:22.886Z" }, - { url = "https://files.pythonhosted.org/packages/dc/89/a231a5c43ede5d6f77ba4a91e915a87dea4aeea76560ba4d2bf185c683f0/numpy-2.3.4-cp314-cp314-win_amd64.whl", hash = "sha256:3da3491cee49cf16157e70f607c03a217ea6647b1cea4819c4f48e53d49139b9", size = 12920542, upload-time = "2025-10-15T16:17:24.783Z" }, - { url = "https://files.pythonhosted.org/packages/0d/0c/ae9434a888f717c5ed2ff2393b3f344f0ff6f1c793519fa0c540461dc530/numpy-2.3.4-cp314-cp314-win_arm64.whl", hash = "sha256:6d9cd732068e8288dbe2717177320723ccec4fb064123f0caf9bbd90ab5be868", size = 10480213, upload-time = "2025-10-15T16:17:26.935Z" }, - { url = "https://files.pythonhosted.org/packages/83/4b/c4a5f0841f92536f6b9592694a5b5f68c9ab37b775ff342649eadf9055d3/numpy-2.3.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:22758999b256b595cf0b1d102b133bb61866ba5ceecf15f759623b64c020c9ec", size = 21052280, upload-time = "2025-10-15T16:17:29.638Z" }, - { url = "https://files.pythonhosted.org/packages/3e/80/90308845fc93b984d2cc96d83e2324ce8ad1fd6efea81b324cba4b673854/numpy-2.3.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9cb177bc55b010b19798dc5497d540dea67fd13a8d9e882b2dae71de0cf09eb3", size = 14302930, upload-time = "2025-10-15T16:17:32.384Z" }, - { url = "https://files.pythonhosted.org/packages/3d/4e/07439f22f2a3b247cec4d63a713faae55e1141a36e77fb212881f7cda3fb/numpy-2.3.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:0f2bcc76f1e05e5ab58893407c63d90b2029908fa41f9f1cc51eecce936c3365", size = 5231504, upload-time = "2025-10-15T16:17:34.515Z" }, - { url = "https://files.pythonhosted.org/packages/ab/de/1e11f2547e2fe3d00482b19721855348b94ada8359aef5d40dd57bfae9df/numpy-2.3.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:8dc20bde86802df2ed8397a08d793da0ad7a5fd4ea3ac85d757bf5dd4ad7c252", size = 6739405, upload-time = "2025-10-15T16:17:36.128Z" }, - { url = "https://files.pythonhosted.org/packages/3b/40/8cd57393a26cebe2e923005db5134a946c62fa56a1087dc7c478f3e30837/numpy-2.3.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e199c087e2aa71c8f9ce1cb7a8e10677dc12457e7cc1be4798632da37c3e86e", size = 14354866, upload-time = "2025-10-15T16:17:38.884Z" }, - { url = "https://files.pythonhosted.org/packages/93/39/5b3510f023f96874ee6fea2e40dfa99313a00bf3ab779f3c92978f34aace/numpy-2.3.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85597b2d25ddf655495e2363fe044b0ae999b75bc4d630dc0d886484b03a5eb0", size = 16703296, upload-time = "2025-10-15T16:17:41.564Z" }, - { url = "https://files.pythonhosted.org/packages/41/0d/19bb163617c8045209c1996c4e427bccbc4bbff1e2c711f39203c8ddbb4a/numpy-2.3.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:04a69abe45b49c5955923cf2c407843d1c85013b424ae8a560bba16c92fe44a0", size = 16136046, upload-time = "2025-10-15T16:17:43.901Z" }, - { url = "https://files.pythonhosted.org/packages/e2/c1/6dba12fdf68b02a21ac411c9df19afa66bed2540f467150ca64d246b463d/numpy-2.3.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e1708fac43ef8b419c975926ce1eaf793b0c13b7356cfab6ab0dc34c0a02ac0f", size = 18652691, upload-time = "2025-10-15T16:17:46.247Z" }, - { url = "https://files.pythonhosted.org/packages/f8/73/f85056701dbbbb910c51d846c58d29fd46b30eecd2b6ba760fc8b8a1641b/numpy-2.3.4-cp314-cp314t-win32.whl", hash = "sha256:863e3b5f4d9915aaf1b8ec79ae560ad21f0b8d5e3adc31e73126491bb86dee1d", size = 6485782, upload-time = "2025-10-15T16:17:48.872Z" }, - { url = "https://files.pythonhosted.org/packages/17/90/28fa6f9865181cb817c2471ee65678afa8a7e2a1fb16141473d5fa6bacc3/numpy-2.3.4-cp314-cp314t-win_amd64.whl", hash = "sha256:962064de37b9aef801d33bc579690f8bfe6c5e70e29b61783f60bcba838a14d6", size = 13113301, upload-time = "2025-10-15T16:17:50.938Z" }, - { url = "https://files.pythonhosted.org/packages/54/23/08c002201a8e7e1f9afba93b97deceb813252d9cfd0d3351caed123dcf97/numpy-2.3.4-cp314-cp314t-win_arm64.whl", hash = "sha256:8b5a9a39c45d852b62693d9b3f3e0fe052541f804296ff401a72a1b60edafb29", size = 10547532, upload-time = "2025-10-15T16:17:53.48Z" }, - { url = "https://files.pythonhosted.org/packages/b1/b6/64898f51a86ec88ca1257a59c1d7fd077b60082a119affefcdf1dd0df8ca/numpy-2.3.4-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:6e274603039f924c0fe5cb73438fa9246699c78a6df1bd3decef9ae592ae1c05", size = 21131552, upload-time = "2025-10-15T16:17:55.845Z" }, - { url = "https://files.pythonhosted.org/packages/ce/4c/f135dc6ebe2b6a3c77f4e4838fa63d350f85c99462012306ada1bd4bc460/numpy-2.3.4-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d149aee5c72176d9ddbc6803aef9c0f6d2ceeea7626574fc68518da5476fa346", size = 14377796, upload-time = "2025-10-15T16:17:58.308Z" }, - { url = "https://files.pythonhosted.org/packages/d0/a4/f33f9c23fcc13dd8412fc8614559b5b797e0aba9d8e01dfa8bae10c84004/numpy-2.3.4-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:6d34ed9db9e6395bb6cd33286035f73a59b058169733a9db9f85e650b88df37e", size = 5306904, upload-time = "2025-10-15T16:18:00.596Z" }, - { url = "https://files.pythonhosted.org/packages/28/af/c44097f25f834360f9fb960fa082863e0bad14a42f36527b2a121abdec56/numpy-2.3.4-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:fdebe771ca06bb8d6abce84e51dca9f7921fe6ad34a0c914541b063e9a68928b", size = 6819682, upload-time = "2025-10-15T16:18:02.32Z" }, - { url = "https://files.pythonhosted.org/packages/c5/8c/cd283b54c3c2b77e188f63e23039844f56b23bba1712318288c13fe86baf/numpy-2.3.4-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:957e92defe6c08211eb77902253b14fe5b480ebc5112bc741fd5e9cd0608f847", size = 14422300, upload-time = "2025-10-15T16:18:04.271Z" }, - { url = "https://files.pythonhosted.org/packages/b0/f0/8404db5098d92446b3e3695cf41c6f0ecb703d701cb0b7566ee2177f2eee/numpy-2.3.4-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13b9062e4f5c7ee5c7e5be96f29ba71bc5a37fed3d1d77c37390ae00724d296d", size = 16760806, upload-time = "2025-10-15T16:18:06.668Z" }, - { url = "https://files.pythonhosted.org/packages/95/8e/2844c3959ce9a63acc7c8e50881133d86666f0420bcde695e115ced0920f/numpy-2.3.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:81b3a59793523e552c4a96109dde028aa4448ae06ccac5a76ff6532a85558a7f", size = 12973130, upload-time = "2025-10-15T16:18:09.397Z" }, -] - -[[package]] -name = "packaging" -version = "24.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950, upload-time = "2024-11-08T09:47:47.202Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451, upload-time = "2024-11-08T09:47:44.722Z" }, -] - -[[package]] -name = "pandas" -version = "2.2.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, - { name = "numpy", version = "2.3.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, - { name = "python-dateutil" }, - { name = "pytz" }, - { name = "tzdata" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213, upload-time = "2024-09-20T13:10:04.827Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/aa/70/c853aec59839bceed032d52010ff5f1b8d87dc3114b762e4ba2727661a3b/pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5", size = 12580827, upload-time = "2024-09-20T13:08:42.347Z" }, - { url = "https://files.pythonhosted.org/packages/99/f2/c4527768739ffa4469b2b4fff05aa3768a478aed89a2f271a79a40eee984/pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348", size = 11303897, upload-time = "2024-09-20T13:08:45.807Z" }, - { url = "https://files.pythonhosted.org/packages/ed/12/86c1747ea27989d7a4064f806ce2bae2c6d575b950be087837bdfcabacc9/pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed", size = 66480908, upload-time = "2024-09-20T18:37:13.513Z" }, - { url = "https://files.pythonhosted.org/packages/44/50/7db2cd5e6373ae796f0ddad3675268c8d59fb6076e66f0c339d61cea886b/pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57", size = 13064210, upload-time = "2024-09-20T13:08:48.325Z" }, - { url = "https://files.pythonhosted.org/packages/61/61/a89015a6d5536cb0d6c3ba02cebed51a95538cf83472975275e28ebf7d0c/pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42", size = 16754292, upload-time = "2024-09-20T19:01:54.443Z" }, - { url = "https://files.pythonhosted.org/packages/ce/0d/4cc7b69ce37fac07645a94e1d4b0880b15999494372c1523508511b09e40/pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f", size = 14416379, upload-time = "2024-09-20T13:08:50.882Z" }, - { url = "https://files.pythonhosted.org/packages/31/9e/6ebb433de864a6cd45716af52a4d7a8c3c9aaf3a98368e61db9e69e69a9c/pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645", size = 11598471, upload-time = "2024-09-20T13:08:53.332Z" }, - { url = "https://files.pythonhosted.org/packages/a8/44/d9502bf0ed197ba9bf1103c9867d5904ddcaf869e52329787fc54ed70cc8/pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039", size = 12602222, upload-time = "2024-09-20T13:08:56.254Z" }, - { url = "https://files.pythonhosted.org/packages/52/11/9eac327a38834f162b8250aab32a6781339c69afe7574368fffe46387edf/pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd", size = 11321274, upload-time = "2024-09-20T13:08:58.645Z" }, - { url = "https://files.pythonhosted.org/packages/45/fb/c4beeb084718598ba19aa9f5abbc8aed8b42f90930da861fcb1acdb54c3a/pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698", size = 15579836, upload-time = "2024-09-20T19:01:57.571Z" }, - { url = "https://files.pythonhosted.org/packages/cd/5f/4dba1d39bb9c38d574a9a22548c540177f78ea47b32f99c0ff2ec499fac5/pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc", size = 13058505, upload-time = "2024-09-20T13:09:01.501Z" }, - { url = "https://files.pythonhosted.org/packages/b9/57/708135b90391995361636634df1f1130d03ba456e95bcf576fada459115a/pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3", size = 16744420, upload-time = "2024-09-20T19:02:00.678Z" }, - { url = "https://files.pythonhosted.org/packages/86/4a/03ed6b7ee323cf30404265c284cee9c65c56a212e0a08d9ee06984ba2240/pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32", size = 14440457, upload-time = "2024-09-20T13:09:04.105Z" }, - { url = "https://files.pythonhosted.org/packages/ed/8c/87ddf1fcb55d11f9f847e3c69bb1c6f8e46e2f40ab1a2d2abadb2401b007/pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5", size = 11617166, upload-time = "2024-09-20T13:09:06.917Z" }, - { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893, upload-time = "2024-09-20T13:09:09.655Z" }, - { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475, upload-time = "2024-09-20T13:09:14.718Z" }, - { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645, upload-time = "2024-09-20T19:02:03.88Z" }, - { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445, upload-time = "2024-09-20T13:09:17.621Z" }, - { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235, upload-time = "2024-09-20T19:02:07.094Z" }, - { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756, upload-time = "2024-09-20T13:09:20.474Z" }, - { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248, upload-time = "2024-09-20T13:09:23.137Z" }, - { url = "https://files.pythonhosted.org/packages/64/22/3b8f4e0ed70644e85cfdcd57454686b9057c6c38d2f74fe4b8bc2527214a/pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015", size = 12477643, upload-time = "2024-09-20T13:09:25.522Z" }, - { url = "https://files.pythonhosted.org/packages/e4/93/b3f5d1838500e22c8d793625da672f3eec046b1a99257666c94446969282/pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28", size = 11281573, upload-time = "2024-09-20T13:09:28.012Z" }, - { url = "https://files.pythonhosted.org/packages/f5/94/6c79b07f0e5aab1dcfa35a75f4817f5c4f677931d4234afcd75f0e6a66ca/pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0", size = 15196085, upload-time = "2024-09-20T19:02:10.451Z" }, - { url = "https://files.pythonhosted.org/packages/e8/31/aa8da88ca0eadbabd0a639788a6da13bb2ff6edbbb9f29aa786450a30a91/pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24", size = 12711809, upload-time = "2024-09-20T13:09:30.814Z" }, - { url = "https://files.pythonhosted.org/packages/ee/7c/c6dbdb0cb2a4344cacfb8de1c5808ca885b2e4dcfde8008266608f9372af/pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659", size = 16356316, upload-time = "2024-09-20T19:02:13.825Z" }, - { url = "https://files.pythonhosted.org/packages/57/b7/8b757e7d92023b832869fa8881a992696a0bfe2e26f72c9ae9f255988d42/pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb", size = 14022055, upload-time = "2024-09-20T13:09:33.462Z" }, - { url = "https://files.pythonhosted.org/packages/3b/bc/4b18e2b8c002572c5a441a64826252ce5da2aa738855747247a971988043/pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d", size = 11481175, upload-time = "2024-09-20T13:09:35.871Z" }, - { url = "https://files.pythonhosted.org/packages/76/a3/a5d88146815e972d40d19247b2c162e88213ef51c7c25993942c39dbf41d/pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468", size = 12615650, upload-time = "2024-09-20T13:09:38.685Z" }, - { url = "https://files.pythonhosted.org/packages/9c/8c/f0fd18f6140ddafc0c24122c8a964e48294acc579d47def376fef12bcb4a/pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18", size = 11290177, upload-time = "2024-09-20T13:09:41.141Z" }, - { url = "https://files.pythonhosted.org/packages/ed/f9/e995754eab9c0f14c6777401f7eece0943840b7a9fc932221c19d1abee9f/pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2", size = 14651526, upload-time = "2024-09-20T19:02:16.905Z" }, - { url = "https://files.pythonhosted.org/packages/25/b0/98d6ae2e1abac4f35230aa756005e8654649d305df9a28b16b9ae4353bff/pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4", size = 11871013, upload-time = "2024-09-20T13:09:44.39Z" }, - { url = "https://files.pythonhosted.org/packages/cc/57/0f72a10f9db6a4628744c8e8f0df4e6e21de01212c7c981d31e50ffc8328/pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d", size = 15711620, upload-time = "2024-09-20T19:02:20.639Z" }, - { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload-time = "2024-09-20T13:09:48.112Z" }, -] - -[[package]] -name = "parso" -version = "0.8.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/66/94/68e2e17afaa9169cf6412ab0f28623903be73d1b32e208d9e8e541bb086d/parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d", size = 400609, upload-time = "2024-04-05T09:43:55.897Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18", size = 103650, upload-time = "2024-04-05T09:43:53.299Z" }, -] - -[[package]] -name = "pexpect" -version = "4.9.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "ptyprocess" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450, upload-time = "2023-11-25T09:07:26.339Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" }, -] - -[[package]] -name = "pickleshare" -version = "0.7.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d8/b6/df3c1c9b616e9c0edbc4fbab6ddd09df9535849c64ba51fcb6531c32d4d8/pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca", size = 6161, upload-time = "2018-09-25T19:17:37.249Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/41/220f49aaea88bc6fa6cba8d05ecf24676326156c23b991e80b3f2fc24c77/pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56", size = 6877, upload-time = "2018-09-25T19:17:35.817Z" }, -] - -[[package]] -name = "platformdirs" -version = "4.3.8" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362, upload-time = "2025-05-07T22:47:42.121Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567, upload-time = "2025-05-07T22:47:40.376Z" }, -] - -[[package]] -name = "pluggy" -version = "1.5.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955, upload-time = "2024-04-20T21:34:42.531Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556, upload-time = "2024-04-20T21:34:40.434Z" }, -] - -[[package]] -name = "pre-commit" -version = "4.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cfgv" }, - { name = "identify" }, - { name = "nodeenv" }, - { name = "pyyaml" }, - { name = "virtualenv" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ff/29/7cf5bbc236333876e4b41f56e06857a87937ce4bf91e117a6991a2dbb02a/pre_commit-4.3.0.tar.gz", hash = "sha256:499fe450cc9d42e9d58e606262795ecb64dd05438943c62b66f6a8673da30b16", size = 193792, upload-time = "2025-08-09T18:56:14.651Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5b/a5/987a405322d78a73b66e39e4a90e4ef156fd7141bf71df987e50717c321b/pre_commit-4.3.0-py2.py3-none-any.whl", hash = "sha256:2b0747ad7e6e967169136edffee14c16e148a778a54e4f967921aa1ebf2308d8", size = 220965, upload-time = "2025-08-09T18:56:13.192Z" }, -] - -[[package]] -name = "prompt-toolkit" -version = "3.0.48" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "wcwidth" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/2d/4f/feb5e137aff82f7c7f3248267b97451da3644f6cdc218edfe549fb354127/prompt_toolkit-3.0.48.tar.gz", hash = "sha256:d6623ab0477a80df74e646bdbc93621143f5caf104206aa29294d53de1a03d90", size = 424684, upload-time = "2024-09-25T10:20:57.609Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/6a/fd08d94654f7e67c52ca30523a178b3f8ccc4237fce4be90d39c938a831a/prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e", size = 386595, upload-time = "2024-09-25T10:20:53.932Z" }, -] - -[[package]] -name = "ptyprocess" -version = "0.7.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762, upload-time = "2020-12-28T15:15:30.155Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993, upload-time = "2020-12-28T15:15:28.35Z" }, -] - -[[package]] -name = "pure-eval" -version = "0.2.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cd/05/0a34433a064256a578f1783a10da6df098ceaa4a57bbeaa96a6c0352786b/pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42", size = 19752, upload-time = "2024-07-21T12:58:21.801Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842, upload-time = "2024-07-21T12:58:20.04Z" }, -] - -[[package]] -name = "pyarrow" -version = "22.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/30/53/04a7fdc63e6056116c9ddc8b43bc28c12cdd181b85cbeadb79278475f3ae/pyarrow-22.0.0.tar.gz", hash = "sha256:3d600dc583260d845c7d8a6db540339dd883081925da2bd1c5cb808f720b3cd9", size = 1151151, upload-time = "2025-10-24T12:30:00.762Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/9b/cb3f7e0a345353def531ca879053e9ef6b9f38ed91aebcf68b09ba54dec0/pyarrow-22.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:77718810bd3066158db1e95a63c160ad7ce08c6b0710bc656055033e39cdad88", size = 34223968, upload-time = "2025-10-24T10:03:31.21Z" }, - { url = "https://files.pythonhosted.org/packages/6c/41/3184b8192a120306270c5307f105b70320fdaa592c99843c5ef78aaefdcf/pyarrow-22.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:44d2d26cda26d18f7af7db71453b7b783788322d756e81730acb98f24eb90ace", size = 35942085, upload-time = "2025-10-24T10:03:38.146Z" }, - { url = "https://files.pythonhosted.org/packages/d9/3d/a1eab2f6f08001f9fb714b8ed5cfb045e2fe3e3e3c0c221f2c9ed1e6d67d/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b9d71701ce97c95480fecb0039ec5bb889e75f110da72005743451339262f4ce", size = 44964613, upload-time = "2025-10-24T10:03:46.516Z" }, - { url = "https://files.pythonhosted.org/packages/46/46/a1d9c24baf21cfd9ce994ac820a24608decf2710521b29223d4334985127/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:710624ab925dc2b05a6229d47f6f0dac1c1155e6ed559be7109f684eba048a48", size = 47627059, upload-time = "2025-10-24T10:03:55.353Z" }, - { url = "https://files.pythonhosted.org/packages/3a/4c/f711acb13075c1391fd54bc17e078587672c575f8de2a6e62509af026dcf/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f963ba8c3b0199f9d6b794c90ec77545e05eadc83973897a4523c9e8d84e9340", size = 47947043, upload-time = "2025-10-24T10:04:05.408Z" }, - { url = "https://files.pythonhosted.org/packages/4e/70/1f3180dd7c2eab35c2aca2b29ace6c519f827dcd4cfeb8e0dca41612cf7a/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bd0d42297ace400d8febe55f13fdf46e86754842b860c978dfec16f081e5c653", size = 50206505, upload-time = "2025-10-24T10:04:15.786Z" }, - { url = "https://files.pythonhosted.org/packages/80/07/fea6578112c8c60ffde55883a571e4c4c6bc7049f119d6b09333b5cc6f73/pyarrow-22.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:00626d9dc0f5ef3a75fe63fd68b9c7c8302d2b5bbc7f74ecaedba83447a24f84", size = 28101641, upload-time = "2025-10-24T10:04:22.57Z" }, - { url = "https://files.pythonhosted.org/packages/2e/b7/18f611a8cdc43417f9394a3ccd3eace2f32183c08b9eddc3d17681819f37/pyarrow-22.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:3e294c5eadfb93d78b0763e859a0c16d4051fc1c5231ae8956d61cb0b5666f5a", size = 34272022, upload-time = "2025-10-24T10:04:28.973Z" }, - { url = "https://files.pythonhosted.org/packages/26/5c/f259e2526c67eb4b9e511741b19870a02363a47a35edbebc55c3178db22d/pyarrow-22.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:69763ab2445f632d90b504a815a2a033f74332997052b721002298ed6de40f2e", size = 35995834, upload-time = "2025-10-24T10:04:35.467Z" }, - { url = "https://files.pythonhosted.org/packages/50/8d/281f0f9b9376d4b7f146913b26fac0aa2829cd1ee7e997f53a27411bbb92/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:b41f37cabfe2463232684de44bad753d6be08a7a072f6a83447eeaf0e4d2a215", size = 45030348, upload-time = "2025-10-24T10:04:43.366Z" }, - { url = "https://files.pythonhosted.org/packages/f5/e5/53c0a1c428f0976bf22f513d79c73000926cb00b9c138d8e02daf2102e18/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:35ad0f0378c9359b3f297299c3309778bb03b8612f987399a0333a560b43862d", size = 47699480, upload-time = "2025-10-24T10:04:51.486Z" }, - { url = "https://files.pythonhosted.org/packages/95/e1/9dbe4c465c3365959d183e6345d0a8d1dc5b02ca3f8db4760b3bc834cf25/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8382ad21458075c2e66a82a29d650f963ce51c7708c7c0ff313a8c206c4fd5e8", size = 48011148, upload-time = "2025-10-24T10:04:59.585Z" }, - { url = "https://files.pythonhosted.org/packages/c5/b4/7caf5d21930061444c3cf4fa7535c82faf5263e22ce43af7c2759ceb5b8b/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1a812a5b727bc09c3d7ea072c4eebf657c2f7066155506ba31ebf4792f88f016", size = 50276964, upload-time = "2025-10-24T10:05:08.175Z" }, - { url = "https://files.pythonhosted.org/packages/ae/f3/cec89bd99fa3abf826f14d4e53d3d11340ce6f6af4d14bdcd54cd83b6576/pyarrow-22.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ec5d40dd494882704fb876c16fa7261a69791e784ae34e6b5992e977bd2e238c", size = 28106517, upload-time = "2025-10-24T10:05:14.314Z" }, - { url = "https://files.pythonhosted.org/packages/af/63/ba23862d69652f85b615ca14ad14f3bcfc5bf1b99ef3f0cd04ff93fdad5a/pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bea79263d55c24a32b0d79c00a1c58bb2ee5f0757ed95656b01c0fb310c5af3d", size = 34211578, upload-time = "2025-10-24T10:05:21.583Z" }, - { url = "https://files.pythonhosted.org/packages/b1/d0/f9ad86fe809efd2bcc8be32032fa72e8b0d112b01ae56a053006376c5930/pyarrow-22.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:12fe549c9b10ac98c91cf791d2945e878875d95508e1a5d14091a7aaa66d9cf8", size = 35989906, upload-time = "2025-10-24T10:05:29.485Z" }, - { url = "https://files.pythonhosted.org/packages/b4/a8/f910afcb14630e64d673f15904ec27dd31f1e009b77033c365c84e8c1e1d/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:334f900ff08ce0423407af97e6c26ad5d4e3b0763645559ece6fbf3747d6a8f5", size = 45021677, upload-time = "2025-10-24T10:05:38.274Z" }, - { url = "https://files.pythonhosted.org/packages/13/95/aec81f781c75cd10554dc17a25849c720d54feafb6f7847690478dcf5ef8/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c6c791b09c57ed76a18b03f2631753a4960eefbbca80f846da8baefc6491fcfe", size = 47726315, upload-time = "2025-10-24T10:05:47.314Z" }, - { url = "https://files.pythonhosted.org/packages/bb/d4/74ac9f7a54cfde12ee42734ea25d5a3c9a45db78f9def949307a92720d37/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c3200cb41cdbc65156e5f8c908d739b0dfed57e890329413da2748d1a2cd1a4e", size = 47990906, upload-time = "2025-10-24T10:05:58.254Z" }, - { url = "https://files.pythonhosted.org/packages/2e/71/fedf2499bf7a95062eafc989ace56572f3343432570e1c54e6599d5b88da/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ac93252226cf288753d8b46280f4edf3433bf9508b6977f8dd8526b521a1bbb9", size = 50306783, upload-time = "2025-10-24T10:06:08.08Z" }, - { url = "https://files.pythonhosted.org/packages/68/ed/b202abd5a5b78f519722f3d29063dda03c114711093c1995a33b8e2e0f4b/pyarrow-22.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:44729980b6c50a5f2bfcc2668d36c569ce17f8b17bccaf470c4313dcbbf13c9d", size = 27972883, upload-time = "2025-10-24T10:06:14.204Z" }, - { url = "https://files.pythonhosted.org/packages/a6/d6/d0fac16a2963002fc22c8fa75180a838737203d558f0ed3b564c4a54eef5/pyarrow-22.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e6e95176209257803a8b3d0394f21604e796dadb643d2f7ca21b66c9c0b30c9a", size = 34204629, upload-time = "2025-10-24T10:06:20.274Z" }, - { url = "https://files.pythonhosted.org/packages/c6/9c/1d6357347fbae062ad3f17082f9ebc29cc733321e892c0d2085f42a2212b/pyarrow-22.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:001ea83a58024818826a9e3f89bf9310a114f7e26dfe404a4c32686f97bd7901", size = 35985783, upload-time = "2025-10-24T10:06:27.301Z" }, - { url = "https://files.pythonhosted.org/packages/ff/c0/782344c2ce58afbea010150df07e3a2f5fdad299cd631697ae7bd3bac6e3/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ce20fe000754f477c8a9125543f1936ea5b8867c5406757c224d745ed033e691", size = 45020999, upload-time = "2025-10-24T10:06:35.387Z" }, - { url = "https://files.pythonhosted.org/packages/1b/8b/5362443737a5307a7b67c1017c42cd104213189b4970bf607e05faf9c525/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e0a15757fccb38c410947df156f9749ae4a3c89b2393741a50521f39a8cf202a", size = 47724601, upload-time = "2025-10-24T10:06:43.551Z" }, - { url = "https://files.pythonhosted.org/packages/69/4d/76e567a4fc2e190ee6072967cb4672b7d9249ac59ae65af2d7e3047afa3b/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cedb9dd9358e4ea1d9bce3665ce0797f6adf97ff142c8e25b46ba9cdd508e9b6", size = 48001050, upload-time = "2025-10-24T10:06:52.284Z" }, - { url = "https://files.pythonhosted.org/packages/01/5e/5653f0535d2a1aef8223cee9d92944cb6bccfee5cf1cd3f462d7cb022790/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:252be4a05f9d9185bb8c18e83764ebcfea7185076c07a7a662253af3a8c07941", size = 50307877, upload-time = "2025-10-24T10:07:02.405Z" }, - { url = "https://files.pythonhosted.org/packages/2d/f8/1d0bd75bf9328a3b826e24a16e5517cd7f9fbf8d34a3184a4566ef5a7f29/pyarrow-22.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:a4893d31e5ef780b6edcaf63122df0f8d321088bb0dee4c8c06eccb1ca28d145", size = 27977099, upload-time = "2025-10-24T10:08:07.259Z" }, - { url = "https://files.pythonhosted.org/packages/90/81/db56870c997805bf2b0f6eeeb2d68458bf4654652dccdcf1bf7a42d80903/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:f7fe3dbe871294ba70d789be16b6e7e52b418311e166e0e3cba9522f0f437fb1", size = 34336685, upload-time = "2025-10-24T10:07:11.47Z" }, - { url = "https://files.pythonhosted.org/packages/1c/98/0727947f199aba8a120f47dfc229eeb05df15bcd7a6f1b669e9f882afc58/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ba95112d15fd4f1105fb2402c4eab9068f0554435e9b7085924bcfaac2cc306f", size = 36032158, upload-time = "2025-10-24T10:07:18.626Z" }, - { url = "https://files.pythonhosted.org/packages/96/b4/9babdef9c01720a0785945c7cf550e4acd0ebcd7bdd2e6f0aa7981fa85e2/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c064e28361c05d72eed8e744c9605cbd6d2bb7481a511c74071fd9b24bc65d7d", size = 44892060, upload-time = "2025-10-24T10:07:26.002Z" }, - { url = "https://files.pythonhosted.org/packages/f8/ca/2f8804edd6279f78a37062d813de3f16f29183874447ef6d1aadbb4efa0f/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6f9762274496c244d951c819348afbcf212714902742225f649cf02823a6a10f", size = 47504395, upload-time = "2025-10-24T10:07:34.09Z" }, - { url = "https://files.pythonhosted.org/packages/b9/f0/77aa5198fd3943682b2e4faaf179a674f0edea0d55d326d83cb2277d9363/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a9d9ffdc2ab696f6b15b4d1f7cec6658e1d788124418cb30030afbae31c64746", size = 48066216, upload-time = "2025-10-24T10:07:43.528Z" }, - { url = "https://files.pythonhosted.org/packages/79/87/a1937b6e78b2aff18b706d738c9e46ade5bfcf11b294e39c87706a0089ac/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ec1a15968a9d80da01e1d30349b2b0d7cc91e96588ee324ce1b5228175043e95", size = 50288552, upload-time = "2025-10-24T10:07:53.519Z" }, - { url = "https://files.pythonhosted.org/packages/60/ae/b5a5811e11f25788ccfdaa8f26b6791c9807119dffcf80514505527c384c/pyarrow-22.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:bba208d9c7decf9961998edf5c65e3ea4355d5818dd6cd0f6809bec1afb951cc", size = 28262504, upload-time = "2025-10-24T10:08:00.932Z" }, - { url = "https://files.pythonhosted.org/packages/bd/b0/0fa4d28a8edb42b0a7144edd20befd04173ac79819547216f8a9f36f9e50/pyarrow-22.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:9bddc2cade6561f6820d4cd73f99a0243532ad506bc510a75a5a65a522b2d74d", size = 34224062, upload-time = "2025-10-24T10:08:14.101Z" }, - { url = "https://files.pythonhosted.org/packages/0f/a8/7a719076b3c1be0acef56a07220c586f25cd24de0e3f3102b438d18ae5df/pyarrow-22.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e70ff90c64419709d38c8932ea9fe1cc98415c4f87ea8da81719e43f02534bc9", size = 35990057, upload-time = "2025-10-24T10:08:21.842Z" }, - { url = "https://files.pythonhosted.org/packages/89/3c/359ed54c93b47fb6fe30ed16cdf50e3f0e8b9ccfb11b86218c3619ae50a8/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:92843c305330aa94a36e706c16209cd4df274693e777ca47112617db7d0ef3d7", size = 45068002, upload-time = "2025-10-24T10:08:29.034Z" }, - { url = "https://files.pythonhosted.org/packages/55/fc/4945896cc8638536ee787a3bd6ce7cec8ec9acf452d78ec39ab328efa0a1/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:6dda1ddac033d27421c20d7a7943eec60be44e0db4e079f33cc5af3b8280ccde", size = 47737765, upload-time = "2025-10-24T10:08:38.559Z" }, - { url = "https://files.pythonhosted.org/packages/cd/5e/7cb7edeb2abfaa1f79b5d5eb89432356155c8426f75d3753cbcb9592c0fd/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:84378110dd9a6c06323b41b56e129c504d157d1a983ce8f5443761eb5256bafc", size = 48048139, upload-time = "2025-10-24T10:08:46.784Z" }, - { url = "https://files.pythonhosted.org/packages/88/c6/546baa7c48185f5e9d6e59277c4b19f30f48c94d9dd938c2a80d4d6b067c/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:854794239111d2b88b40b6ef92aa478024d1e5074f364033e73e21e3f76b25e0", size = 50314244, upload-time = "2025-10-24T10:08:55.771Z" }, - { url = "https://files.pythonhosted.org/packages/3c/79/755ff2d145aafec8d347bf18f95e4e81c00127f06d080135dfc86aea417c/pyarrow-22.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:b883fe6fd85adad7932b3271c38ac289c65b7337c2c132e9569f9d3940620730", size = 28757501, upload-time = "2025-10-24T10:09:59.891Z" }, - { url = "https://files.pythonhosted.org/packages/0e/d2/237d75ac28ced3147912954e3c1a174df43a95f4f88e467809118a8165e0/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:7a820d8ae11facf32585507c11f04e3f38343c1e784c9b5a8b1da5c930547fe2", size = 34355506, upload-time = "2025-10-24T10:09:02.953Z" }, - { url = "https://files.pythonhosted.org/packages/1e/2c/733dfffe6d3069740f98e57ff81007809067d68626c5faef293434d11bd6/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:c6ec3675d98915bf1ec8b3c7986422682f7232ea76cad276f4c8abd5b7319b70", size = 36047312, upload-time = "2025-10-24T10:09:10.334Z" }, - { url = "https://files.pythonhosted.org/packages/7c/2b/29d6e3782dc1f299727462c1543af357a0f2c1d3c160ce199950d9ca51eb/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3e739edd001b04f654b166204fc7a9de896cf6007eaff33409ee9e50ceaff754", size = 45081609, upload-time = "2025-10-24T10:09:18.61Z" }, - { url = "https://files.pythonhosted.org/packages/8d/42/aa9355ecc05997915af1b7b947a7f66c02dcaa927f3203b87871c114ba10/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7388ac685cab5b279a41dfe0a6ccd99e4dbf322edfb63e02fc0443bf24134e91", size = 47703663, upload-time = "2025-10-24T10:09:27.369Z" }, - { url = "https://files.pythonhosted.org/packages/ee/62/45abedde480168e83a1de005b7b7043fd553321c1e8c5a9a114425f64842/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f633074f36dbc33d5c05b5dc75371e5660f1dbf9c8b1d95669def05e5425989c", size = 48066543, upload-time = "2025-10-24T10:09:34.908Z" }, - { url = "https://files.pythonhosted.org/packages/84/e9/7878940a5b072e4f3bf998770acafeae13b267f9893af5f6d4ab3904b67e/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4c19236ae2402a8663a2c8f21f1870a03cc57f0bef7e4b6eb3238cc82944de80", size = 50288838, upload-time = "2025-10-24T10:09:44.394Z" }, - { url = "https://files.pythonhosted.org/packages/7b/03/f335d6c52b4a4761bcc83499789a1e2e16d9d201a58c327a9b5cc9a41bd9/pyarrow-22.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0c34fe18094686194f204a3b1787a27456897d8a2d62caf84b61e8dfbc0252ae", size = 29185594, upload-time = "2025-10-24T10:09:53.111Z" }, -] - -[[package]] -name = "pycparser" -version = "2.22" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736, upload-time = "2024-03-30T13:22:22.564Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552, upload-time = "2024-03-30T13:22:20.476Z" }, -] - -[[package]] -name = "pydata-sphinx-theme" -version = "0.8.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "beautifulsoup4" }, - { name = "docutils" }, - { name = "sphinx" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/fc/d6/3921de802cf1ee771f0e76c9068b52498aeb8eeec6b830ff931c81c7ecf3/pydata_sphinx_theme-0.8.0.tar.gz", hash = "sha256:9f72015d9c572ea92e3007ab221a8325767c426783b6b9941813e65fa988dc90", size = 1123746, upload-time = "2022-01-15T19:25:25.712Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/91/26/0694318d46c7d90ab602ae27b24431e939f1600f9a4c69d1e727ec57289f/pydata_sphinx_theme-0.8.0-py3-none-any.whl", hash = "sha256:fbcbb833a07d3ad8dd997dd40dc94da18d98b41c68123ab0182b58fe92271204", size = 3284997, upload-time = "2022-01-15T19:25:23.807Z" }, -] - -[[package]] -name = "pygithub" -version = "2.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "deprecated" }, - { name = "pyjwt", extra = ["crypto"] }, - { name = "pynacl" }, - { name = "requests" }, - { name = "typing-extensions" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/16/ce/aa91d30040d9552c274e7ea8bd10a977600d508d579a4bb262b95eccf961/pygithub-2.5.0.tar.gz", hash = "sha256:e1613ac508a9be710920d26eb18b1905ebd9926aa49398e88151c1b526aad3cf", size = 3552804, upload-time = "2024-11-06T20:50:07.168Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/37/05/bfbdbbc5d8aafd8dae9b3b6877edca561fccd8528ef5edc4e7b6d23721b5/PyGithub-2.5.0-py3-none-any.whl", hash = "sha256:b0b635999a658ab8e08720bdd3318893ff20e2275f6446fcf35bf3f44f2c0fd2", size = 375935, upload-time = "2024-11-06T20:50:04.931Z" }, -] - -[[package]] -name = "pygments" -version = "2.19.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581, upload-time = "2025-01-06T17:26:30.443Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293, upload-time = "2025-01-06T17:26:25.553Z" }, -] - -[[package]] -name = "pyjwt" -version = "2.10.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload-time = "2024-11-28T03:43:29.933Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" }, -] - -[package.optional-dependencies] -crypto = [ - { name = "cryptography" }, -] - -[[package]] -name = "pynacl" -version = "1.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cffi" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a7/22/27582568be639dfe22ddb3902225f91f2f17ceff88ce80e4db396c8986da/PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba", size = 3392854, upload-time = "2022-01-07T22:05:41.134Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ce/75/0b8ede18506041c0bf23ac4d8e2971b4161cd6ce630b177d0a08eb0d8857/PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1", size = 349920, upload-time = "2022-01-07T22:05:49.156Z" }, - { url = "https://files.pythonhosted.org/packages/59/bb/fddf10acd09637327a97ef89d2a9d621328850a72f1fdc8c08bdf72e385f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92", size = 601722, upload-time = "2022-01-07T22:05:50.989Z" }, - { url = "https://files.pythonhosted.org/packages/5d/70/87a065c37cca41a75f2ce113a5a2c2aa7533be648b184ade58971b5f7ccc/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394", size = 680087, upload-time = "2022-01-07T22:05:52.539Z" }, - { url = "https://files.pythonhosted.org/packages/ee/87/f1bb6a595f14a327e8285b9eb54d41fef76c585a0edef0a45f6fc95de125/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d", size = 856678, upload-time = "2022-01-07T22:05:54.251Z" }, - { url = "https://files.pythonhosted.org/packages/66/28/ca86676b69bf9f90e710571b67450508484388bfce09acf8a46f0b8c785f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858", size = 1133660, upload-time = "2022-01-07T22:05:56.056Z" }, - { url = "https://files.pythonhosted.org/packages/3d/85/c262db650e86812585e2bc59e497a8f59948a005325a11bbbc9ecd3fe26b/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b", size = 663824, upload-time = "2022-01-07T22:05:57.434Z" }, - { url = "https://files.pythonhosted.org/packages/fd/1a/cc308a884bd299b651f1633acb978e8596c71c33ca85e9dc9fa33a5399b9/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff", size = 1117912, upload-time = "2022-01-07T22:05:58.665Z" }, - { url = "https://files.pythonhosted.org/packages/25/2d/b7df6ddb0c2a33afdb358f8af6ea3b8c4d1196ca45497dd37a56f0c122be/PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543", size = 204624, upload-time = "2022-01-07T22:06:00.085Z" }, - { url = "https://files.pythonhosted.org/packages/5e/22/d3db169895faaf3e2eda892f005f433a62db2decbcfbc2f61e6517adfa87/PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", size = 212141, upload-time = "2022-01-07T22:06:01.861Z" }, -] - -[[package]] -name = "pytest" -version = "8.3.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, - { name = "iniconfig" }, - { name = "packaging" }, - { name = "pluggy" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/05/35/30e0d83068951d90a01852cb1cef56e5d8a09d20c7f511634cc2f7e0372a/pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761", size = 1445919, upload-time = "2024-12-01T12:54:25.98Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083, upload-time = "2024-12-01T12:54:19.735Z" }, -] - -[[package]] -name = "pytest-asyncio" -version = "0.25.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pytest" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f2/a8/ecbc8ede70921dd2f544ab1cadd3ff3bf842af27f87bbdea774c7baa1d38/pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a", size = 54239, upload-time = "2025-01-28T18:37:58.729Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/67/17/3493c5624e48fd97156ebaec380dcaafee9506d7e2c46218ceebbb57d7de/pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3", size = 19467, upload-time = "2025-01-28T18:37:56.798Z" }, -] - -[[package]] -name = "python-dateutil" -version = "2.9.0.post0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "six" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, -] - -[[package]] -name = "pytz" -version = "2024.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3a/31/3c70bf7603cc2dca0f19bdc53b4537a797747a58875b552c8c413d963a3f/pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a", size = 319692, upload-time = "2024-09-11T02:24:47.91Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/11/c3/005fcca25ce078d2cc29fd559379817424e94885510568bc1bc53d7d5846/pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725", size = 508002, upload-time = "2024-09-11T02:24:45.8Z" }, -] - -[[package]] -name = "pyyaml" -version = "6.0.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f4/a0/39350dd17dd6d6c6507025c0e53aef67a9293a6d37d3511f23ea510d5800/pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b", size = 184227, upload-time = "2025-09-25T21:31:46.04Z" }, - { url = "https://files.pythonhosted.org/packages/05/14/52d505b5c59ce73244f59c7a50ecf47093ce4765f116cdb98286a71eeca2/pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956", size = 174019, upload-time = "2025-09-25T21:31:47.706Z" }, - { url = "https://files.pythonhosted.org/packages/43/f7/0e6a5ae5599c838c696adb4e6330a59f463265bfa1e116cfd1fbb0abaaae/pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8", size = 740646, upload-time = "2025-09-25T21:31:49.21Z" }, - { url = "https://files.pythonhosted.org/packages/2f/3a/61b9db1d28f00f8fd0ae760459a5c4bf1b941baf714e207b6eb0657d2578/pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198", size = 840793, upload-time = "2025-09-25T21:31:50.735Z" }, - { url = "https://files.pythonhosted.org/packages/7a/1e/7acc4f0e74c4b3d9531e24739e0ab832a5edf40e64fbae1a9c01941cabd7/pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b", size = 770293, upload-time = "2025-09-25T21:31:51.828Z" }, - { url = "https://files.pythonhosted.org/packages/8b/ef/abd085f06853af0cd59fa5f913d61a8eab65d7639ff2a658d18a25d6a89d/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0", size = 732872, upload-time = "2025-09-25T21:31:53.282Z" }, - { url = "https://files.pythonhosted.org/packages/1f/15/2bc9c8faf6450a8b3c9fc5448ed869c599c0a74ba2669772b1f3a0040180/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69", size = 758828, upload-time = "2025-09-25T21:31:54.807Z" }, - { url = "https://files.pythonhosted.org/packages/a3/00/531e92e88c00f4333ce359e50c19b8d1de9fe8d581b1534e35ccfbc5f393/pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e", size = 142415, upload-time = "2025-09-25T21:31:55.885Z" }, - { url = "https://files.pythonhosted.org/packages/2a/fa/926c003379b19fca39dd4634818b00dec6c62d87faf628d1394e137354d4/pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c", size = 158561, upload-time = "2025-09-25T21:31:57.406Z" }, - { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826, upload-time = "2025-09-25T21:31:58.655Z" }, - { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577, upload-time = "2025-09-25T21:32:00.088Z" }, - { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556, upload-time = "2025-09-25T21:32:01.31Z" }, - { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114, upload-time = "2025-09-25T21:32:03.376Z" }, - { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638, upload-time = "2025-09-25T21:32:04.553Z" }, - { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463, upload-time = "2025-09-25T21:32:06.152Z" }, - { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986, upload-time = "2025-09-25T21:32:07.367Z" }, - { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543, upload-time = "2025-09-25T21:32:08.95Z" }, - { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" }, - { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" }, - { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" }, - { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" }, - { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" }, - { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" }, - { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" }, - { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" }, - { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" }, - { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" }, - { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, - { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" }, - { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" }, - { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" }, - { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" }, - { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" }, - { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" }, - { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" }, - { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" }, - { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" }, - { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, - { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, - { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, - { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, - { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, - { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, - { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, - { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, - { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, - { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, - { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, - { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, - { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, - { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, - { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, - { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, - { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, - { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, - { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, -] - -[[package]] -name = "requests" -version = "2.32.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "certifi" }, - { name = "charset-normalizer" }, - { name = "idna" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218, upload-time = "2024-05-29T15:37:49.536Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928, upload-time = "2024-05-29T15:37:47.027Z" }, -] - -[[package]] -name = "ruff" -version = "0.9.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/67/3e/e89f736f01aa9517a97e2e7e0ce8d34a4d8207087b3cfdec95133fee13b5/ruff-0.9.1.tar.gz", hash = "sha256:fd2b25ecaf907d6458fa842675382c8597b3c746a2dde6717fe3415425df0c17", size = 3498844, upload-time = "2025-01-10T18:57:53.896Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/dc/05/c3a2e0feb3d5d394cdfd552de01df9d3ec8a3a3771bbff247fab7e668653/ruff-0.9.1-py3-none-linux_armv6l.whl", hash = "sha256:84330dda7abcc270e6055551aca93fdde1b0685fc4fd358f26410f9349cf1743", size = 10645241, upload-time = "2025-01-10T18:56:45.897Z" }, - { url = "https://files.pythonhosted.org/packages/dd/da/59f0a40e5f88ee5c054ad175caaa2319fc96571e1d29ab4730728f2aad4f/ruff-0.9.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:3cae39ba5d137054b0e5b472aee3b78a7c884e61591b100aeb544bcd1fc38d4f", size = 10391066, upload-time = "2025-01-10T18:56:52.224Z" }, - { url = "https://files.pythonhosted.org/packages/b7/fe/85e1c1acf0ba04a3f2d54ae61073da030f7a5dc386194f96f3c6ca444a78/ruff-0.9.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:50c647ff96f4ba288db0ad87048257753733763b409b2faf2ea78b45c8bb7fcb", size = 10012308, upload-time = "2025-01-10T18:56:55.426Z" }, - { url = "https://files.pythonhosted.org/packages/6f/9b/780aa5d4bdca8dcea4309264b8faa304bac30e1ce0bcc910422bfcadd203/ruff-0.9.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0c8b149e9c7353cace7d698e1656ffcf1e36e50f8ea3b5d5f7f87ff9986a7ca", size = 10881960, upload-time = "2025-01-10T18:56:59.539Z" }, - { url = "https://files.pythonhosted.org/packages/12/f4/dac4361afbfe520afa7186439e8094e4884ae3b15c8fc75fb2e759c1f267/ruff-0.9.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:beb3298604540c884d8b282fe7625651378e1986c25df51dec5b2f60cafc31ce", size = 10414803, upload-time = "2025-01-10T18:57:04.919Z" }, - { url = "https://files.pythonhosted.org/packages/f0/a2/057a3cb7999513cb78d6cb33a7d1cc6401c82d7332583786e4dad9e38e44/ruff-0.9.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39d0174ccc45c439093971cc06ed3ac4dc545f5e8bdacf9f067adf879544d969", size = 11464929, upload-time = "2025-01-10T18:57:08.146Z" }, - { url = "https://files.pythonhosted.org/packages/eb/c6/1ccfcc209bee465ced4874dcfeaadc88aafcc1ea9c9f31ef66f063c187f0/ruff-0.9.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:69572926c0f0c9912288915214ca9b2809525ea263603370b9e00bed2ba56dbd", size = 12170717, upload-time = "2025-01-10T18:57:12.564Z" }, - { url = "https://files.pythonhosted.org/packages/84/97/4a524027518525c7cf6931e9fd3b2382be5e4b75b2b61bec02681a7685a5/ruff-0.9.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:937267afce0c9170d6d29f01fcd1f4378172dec6760a9f4dface48cdabf9610a", size = 11708921, upload-time = "2025-01-10T18:57:17.216Z" }, - { url = "https://files.pythonhosted.org/packages/a6/a4/4e77cf6065c700d5593b25fca6cf725b1ab6d70674904f876254d0112ed0/ruff-0.9.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:186c2313de946f2c22bdf5954b8dd083e124bcfb685732cfb0beae0c47233d9b", size = 13058074, upload-time = "2025-01-10T18:57:20.57Z" }, - { url = "https://files.pythonhosted.org/packages/f9/d6/fcb78e0531e863d0a952c4c5600cc5cd317437f0e5f031cd2288b117bb37/ruff-0.9.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f94942a3bb767675d9a051867c036655fe9f6c8a491539156a6f7e6b5f31831", size = 11281093, upload-time = "2025-01-10T18:57:25.526Z" }, - { url = "https://files.pythonhosted.org/packages/e4/3b/7235bbeff00c95dc2d073cfdbf2b871b5bbf476754c5d277815d286b4328/ruff-0.9.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:728d791b769cc28c05f12c280f99e8896932e9833fef1dd8756a6af2261fd1ab", size = 10882610, upload-time = "2025-01-10T18:57:28.855Z" }, - { url = "https://files.pythonhosted.org/packages/2a/66/5599d23257c61cf038137f82999ca8f9d0080d9d5134440a461bef85b461/ruff-0.9.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:2f312c86fb40c5c02b44a29a750ee3b21002bd813b5233facdaf63a51d9a85e1", size = 10489273, upload-time = "2025-01-10T18:57:32.219Z" }, - { url = "https://files.pythonhosted.org/packages/78/85/de4aa057e2532db0f9761e2c2c13834991e087787b93e4aeb5f1cb10d2df/ruff-0.9.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:ae017c3a29bee341ba584f3823f805abbe5fe9cd97f87ed07ecbf533c4c88366", size = 11003314, upload-time = "2025-01-10T18:57:35.431Z" }, - { url = "https://files.pythonhosted.org/packages/00/42/afedcaa089116d81447347f76041ff46025849fedb0ed2b187d24cf70fca/ruff-0.9.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5dc40a378a0e21b4cfe2b8a0f1812a6572fc7b230ef12cd9fac9161aa91d807f", size = 11342982, upload-time = "2025-01-10T18:57:38.642Z" }, - { url = "https://files.pythonhosted.org/packages/39/c6/fe45f3eb27e3948b41a305d8b768e949bf6a39310e9df73f6c576d7f1d9f/ruff-0.9.1-py3-none-win32.whl", hash = "sha256:46ebf5cc106cf7e7378ca3c28ce4293b61b449cd121b98699be727d40b79ba72", size = 8819750, upload-time = "2025-01-10T18:57:41.93Z" }, - { url = "https://files.pythonhosted.org/packages/38/8d/580db77c3b9d5c3d9479e55b0b832d279c30c8f00ab0190d4cd8fc67831c/ruff-0.9.1-py3-none-win_amd64.whl", hash = "sha256:342a824b46ddbcdddd3abfbb332fa7fcaac5488bf18073e841236aadf4ad5c19", size = 9701331, upload-time = "2025-01-10T18:57:46.334Z" }, - { url = "https://files.pythonhosted.org/packages/b2/94/0498cdb7316ed67a1928300dd87d659c933479f44dec51b4f62bfd1f8028/ruff-0.9.1-py3-none-win_arm64.whl", hash = "sha256:1cd76c7f9c679e6e8f2af8f778367dca82b95009bc7b1a85a47f1521ae524fa7", size = 9145708, upload-time = "2025-01-10T18:57:51.308Z" }, -] - -[[package]] -name = "setuptools" -version = "75.8.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/92/ec/089608b791d210aec4e7f97488e67ab0d33add3efccb83a056cbafe3a2a6/setuptools-75.8.0.tar.gz", hash = "sha256:c5afc8f407c626b8313a86e10311dd3f661c6cd9c09d4bf8c15c0e11f9f2b0e6", size = 1343222, upload-time = "2025-01-08T18:28:23.98Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/69/8a/b9dc7678803429e4a3bc9ba462fa3dd9066824d3c607490235c6a796be5a/setuptools-75.8.0-py3-none-any.whl", hash = "sha256:e3982f444617239225d675215d51f6ba05f845d4eec313da4418fdbb56fb27e3", size = 1228782, upload-time = "2025-01-08T18:28:20.912Z" }, -] - -[[package]] -name = "six" -version = "1.17.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, -] - -[[package]] -name = "snowballstemmer" -version = "2.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/44/7b/af302bebf22c749c56c9c3e8ae13190b5b5db37a33d9068652e8f73b7089/snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1", size = 86699, upload-time = "2021-11-16T18:38:38.009Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a", size = 93002, upload-time = "2021-11-16T18:38:34.792Z" }, -] - -[[package]] -name = "soupsieve" -version = "2.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d7/ce/fbaeed4f9fb8b2daa961f90591662df6a86c1abf25c548329a86920aedfb/soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb", size = 101569, upload-time = "2024-08-13T13:39:12.166Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186, upload-time = "2024-08-13T13:39:10.986Z" }, -] - -[[package]] -name = "sphinx" -version = "8.1.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "alabaster" }, - { name = "babel" }, - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "docutils" }, - { name = "imagesize" }, - { name = "jinja2" }, - { name = "packaging" }, - { name = "pygments" }, - { name = "requests" }, - { name = "snowballstemmer" }, - { name = "sphinxcontrib-applehelp" }, - { name = "sphinxcontrib-devhelp" }, - { name = "sphinxcontrib-htmlhelp" }, - { name = "sphinxcontrib-jsmath" }, - { name = "sphinxcontrib-qthelp" }, - { name = "sphinxcontrib-serializinghtml" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/be0b61178fe2cdcb67e2a92fc9ebb488e3c51c4f74a36a7824c0adf23425/sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927", size = 8184611, upload-time = "2024-10-13T20:27:13.93Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/26/60/1ddff83a56d33aaf6f10ec8ce84b4c007d9368b21008876fceda7e7381ef/sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2", size = 3487125, upload-time = "2024-10-13T20:27:10.448Z" }, -] - -[[package]] -name = "sphinx-autoapi" -version = "3.4.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "astroid" }, - { name = "jinja2" }, - { name = "pyyaml" }, - { name = "sphinx" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/4a/eb/cc243583bb1d518ca3b10998c203d919a8ed90affd4831f2b61ad09043d2/sphinx_autoapi-3.4.0.tar.gz", hash = "sha256:e6d5371f9411bbb9fca358c00a9e57aef3ac94cbfc5df4bab285946462f69e0c", size = 29292, upload-time = "2024-11-30T01:09:40.956Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/de/d6/f2acdc2567337fd5f5dc091a4e58d8a0fb14927b9779fc1e5ecee96d9824/sphinx_autoapi-3.4.0-py3-none-any.whl", hash = "sha256:4027fef2875a22c5f2a57107c71641d82f6166bf55beb407a47aaf3ef14e7b92", size = 34095, upload-time = "2024-11-30T01:09:17.272Z" }, -] - -[[package]] -name = "sphinxcontrib-applehelp" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053, upload-time = "2024-07-29T01:09:00.465Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300, upload-time = "2024-07-29T01:08:58.99Z" }, -] - -[[package]] -name = "sphinxcontrib-devhelp" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967, upload-time = "2024-07-29T01:09:23.417Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530, upload-time = "2024-07-29T01:09:21.945Z" }, -] - -[[package]] -name = "sphinxcontrib-htmlhelp" -version = "2.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617, upload-time = "2024-07-29T01:09:37.889Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705, upload-time = "2024-07-29T01:09:36.407Z" }, -] - -[[package]] -name = "sphinxcontrib-jsmath" -version = "1.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b2/e8/9ed3830aeed71f17c026a07a5097edcf44b692850ef215b161b8ad875729/sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8", size = 5787, upload-time = "2019-01-21T16:10:16.347Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071, upload-time = "2019-01-21T16:10:14.333Z" }, -] - -[[package]] -name = "sphinxcontrib-qthelp" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165, upload-time = "2024-07-29T01:09:56.435Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743, upload-time = "2024-07-29T01:09:54.885Z" }, -] - -[[package]] -name = "sphinxcontrib-serializinghtml" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080, upload-time = "2024-07-29T01:10:09.332Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072, upload-time = "2024-07-29T01:10:08.203Z" }, -] - -[[package]] -name = "stack-data" -version = "0.6.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "asttokens" }, - { name = "executing" }, - { name = "pure-eval" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/28/e3/55dcc2cfbc3ca9c29519eb6884dd1415ecb53b0e934862d3559ddcb7e20b/stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", size = 44707, upload-time = "2023-09-30T13:58:05.479Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521, upload-time = "2023-09-30T13:58:03.53Z" }, -] - -[[package]] -name = "toml" -version = "0.10.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253, upload-time = "2020-11-01T01:40:22.204Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload-time = "2020-11-01T01:40:20.672Z" }, -] - -[[package]] -name = "tomli" -version = "2.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/18/87/302344fed471e44a87289cf4967697d07e532f2421fdaf868a303cbae4ff/tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff", size = 17175, upload-time = "2024-11-27T22:38:36.873Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/43/ca/75707e6efa2b37c77dadb324ae7d9571cb424e61ea73fad7c56c2d14527f/tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249", size = 131077, upload-time = "2024-11-27T22:37:54.956Z" }, - { url = "https://files.pythonhosted.org/packages/c7/16/51ae563a8615d472fdbffc43a3f3d46588c264ac4f024f63f01283becfbb/tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6", size = 123429, upload-time = "2024-11-27T22:37:56.698Z" }, - { url = "https://files.pythonhosted.org/packages/f1/dd/4f6cd1e7b160041db83c694abc78e100473c15d54620083dbd5aae7b990e/tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a", size = 226067, upload-time = "2024-11-27T22:37:57.63Z" }, - { url = "https://files.pythonhosted.org/packages/a9/6b/c54ede5dc70d648cc6361eaf429304b02f2871a345bbdd51e993d6cdf550/tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee", size = 236030, upload-time = "2024-11-27T22:37:59.344Z" }, - { url = "https://files.pythonhosted.org/packages/1f/47/999514fa49cfaf7a92c805a86c3c43f4215621855d151b61c602abb38091/tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e", size = 240898, upload-time = "2024-11-27T22:38:00.429Z" }, - { url = "https://files.pythonhosted.org/packages/73/41/0a01279a7ae09ee1573b423318e7934674ce06eb33f50936655071d81a24/tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4", size = 229894, upload-time = "2024-11-27T22:38:02.094Z" }, - { url = "https://files.pythonhosted.org/packages/55/18/5d8bc5b0a0362311ce4d18830a5d28943667599a60d20118074ea1b01bb7/tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106", size = 245319, upload-time = "2024-11-27T22:38:03.206Z" }, - { url = "https://files.pythonhosted.org/packages/92/a3/7ade0576d17f3cdf5ff44d61390d4b3febb8a9fc2b480c75c47ea048c646/tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8", size = 238273, upload-time = "2024-11-27T22:38:04.217Z" }, - { url = "https://files.pythonhosted.org/packages/72/6f/fa64ef058ac1446a1e51110c375339b3ec6be245af9d14c87c4a6412dd32/tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff", size = 98310, upload-time = "2024-11-27T22:38:05.908Z" }, - { url = "https://files.pythonhosted.org/packages/6a/1c/4a2dcde4a51b81be3530565e92eda625d94dafb46dbeb15069df4caffc34/tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b", size = 108309, upload-time = "2024-11-27T22:38:06.812Z" }, - { url = "https://files.pythonhosted.org/packages/52/e1/f8af4c2fcde17500422858155aeb0d7e93477a0d59a98e56cbfe75070fd0/tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea", size = 132762, upload-time = "2024-11-27T22:38:07.731Z" }, - { url = "https://files.pythonhosted.org/packages/03/b8/152c68bb84fc00396b83e7bbddd5ec0bd3dd409db4195e2a9b3e398ad2e3/tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8", size = 123453, upload-time = "2024-11-27T22:38:09.384Z" }, - { url = "https://files.pythonhosted.org/packages/c8/d6/fc9267af9166f79ac528ff7e8c55c8181ded34eb4b0e93daa767b8841573/tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192", size = 233486, upload-time = "2024-11-27T22:38:10.329Z" }, - { url = "https://files.pythonhosted.org/packages/5c/51/51c3f2884d7bab89af25f678447ea7d297b53b5a3b5730a7cb2ef6069f07/tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222", size = 242349, upload-time = "2024-11-27T22:38:11.443Z" }, - { url = "https://files.pythonhosted.org/packages/ab/df/bfa89627d13a5cc22402e441e8a931ef2108403db390ff3345c05253935e/tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77", size = 252159, upload-time = "2024-11-27T22:38:13.099Z" }, - { url = "https://files.pythonhosted.org/packages/9e/6e/fa2b916dced65763a5168c6ccb91066f7639bdc88b48adda990db10c8c0b/tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6", size = 237243, upload-time = "2024-11-27T22:38:14.766Z" }, - { url = "https://files.pythonhosted.org/packages/b4/04/885d3b1f650e1153cbb93a6a9782c58a972b94ea4483ae4ac5cedd5e4a09/tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd", size = 259645, upload-time = "2024-11-27T22:38:15.843Z" }, - { url = "https://files.pythonhosted.org/packages/9c/de/6b432d66e986e501586da298e28ebeefd3edc2c780f3ad73d22566034239/tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e", size = 244584, upload-time = "2024-11-27T22:38:17.645Z" }, - { url = "https://files.pythonhosted.org/packages/1c/9a/47c0449b98e6e7d1be6cbac02f93dd79003234ddc4aaab6ba07a9a7482e2/tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98", size = 98875, upload-time = "2024-11-27T22:38:19.159Z" }, - { url = "https://files.pythonhosted.org/packages/ef/60/9b9638f081c6f1261e2688bd487625cd1e660d0a85bd469e91d8db969734/tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4", size = 109418, upload-time = "2024-11-27T22:38:20.064Z" }, - { url = "https://files.pythonhosted.org/packages/04/90/2ee5f2e0362cb8a0b6499dc44f4d7d48f8fff06d28ba46e6f1eaa61a1388/tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7", size = 132708, upload-time = "2024-11-27T22:38:21.659Z" }, - { url = "https://files.pythonhosted.org/packages/c0/ec/46b4108816de6b385141f082ba99e315501ccd0a2ea23db4a100dd3990ea/tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c", size = 123582, upload-time = "2024-11-27T22:38:22.693Z" }, - { url = "https://files.pythonhosted.org/packages/a0/bd/b470466d0137b37b68d24556c38a0cc819e8febe392d5b199dcd7f578365/tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13", size = 232543, upload-time = "2024-11-27T22:38:24.367Z" }, - { url = "https://files.pythonhosted.org/packages/d9/e5/82e80ff3b751373f7cead2815bcbe2d51c895b3c990686741a8e56ec42ab/tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281", size = 241691, upload-time = "2024-11-27T22:38:26.081Z" }, - { url = "https://files.pythonhosted.org/packages/05/7e/2a110bc2713557d6a1bfb06af23dd01e7dde52b6ee7dadc589868f9abfac/tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272", size = 251170, upload-time = "2024-11-27T22:38:27.921Z" }, - { url = "https://files.pythonhosted.org/packages/64/7b/22d713946efe00e0adbcdfd6d1aa119ae03fd0b60ebed51ebb3fa9f5a2e5/tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140", size = 236530, upload-time = "2024-11-27T22:38:29.591Z" }, - { url = "https://files.pythonhosted.org/packages/38/31/3a76f67da4b0cf37b742ca76beaf819dca0ebef26d78fc794a576e08accf/tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2", size = 258666, upload-time = "2024-11-27T22:38:30.639Z" }, - { url = "https://files.pythonhosted.org/packages/07/10/5af1293da642aded87e8a988753945d0cf7e00a9452d3911dd3bb354c9e2/tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744", size = 243954, upload-time = "2024-11-27T22:38:31.702Z" }, - { url = "https://files.pythonhosted.org/packages/5b/b9/1ed31d167be802da0fc95020d04cd27b7d7065cc6fbefdd2f9186f60d7bd/tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec", size = 98724, upload-time = "2024-11-27T22:38:32.837Z" }, - { url = "https://files.pythonhosted.org/packages/c7/32/b0963458706accd9afcfeb867c0f9175a741bf7b19cd424230714d722198/tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69", size = 109383, upload-time = "2024-11-27T22:38:34.455Z" }, - { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257, upload-time = "2024-11-27T22:38:35.385Z" }, -] - -[[package]] -name = "traitlets" -version = "5.14.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621, upload-time = "2024-04-19T11:11:49.746Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, -] - -[[package]] -name = "typing-extensions" -version = "4.12.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321, upload-time = "2024-06-07T18:52:15.995Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438, upload-time = "2024-06-07T18:52:13.582Z" }, -] - -[[package]] -name = "tzdata" -version = "2024.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e1/34/943888654477a574a86a98e9896bae89c7aa15078ec29f490fef2f1e5384/tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc", size = 193282, upload-time = "2024-09-23T18:56:46.89Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd", size = 346586, upload-time = "2024-09-23T18:56:45.478Z" }, -] - -[[package]] -name = "urllib3" -version = "2.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/aa/63/e53da845320b757bf29ef6a9062f5c669fe997973f966045cb019c3f4b66/urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d", size = 307268, upload-time = "2024-12-22T07:47:30.032Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369, upload-time = "2024-12-22T07:47:28.074Z" }, -] - -[[package]] -name = "virtualenv" -version = "20.31.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "distlib" }, - { name = "filelock" }, - { name = "platformdirs" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/56/2c/444f465fb2c65f40c3a104fd0c495184c4f2336d65baf398e3c75d72ea94/virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af", size = 6076316, upload-time = "2025-05-08T17:58:23.811Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f3/40/b1c265d4b2b62b58576588510fc4d1fe60a86319c8de99fd8e9fec617d2c/virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11", size = 6057982, upload-time = "2025-05-08T17:58:21.15Z" }, -] - -[[package]] -name = "wcwidth" -version = "0.2.13" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301, upload-time = "2024-01-06T02:10:57.829Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166, upload-time = "2024-01-06T02:10:55.763Z" }, -] - -[[package]] -name = "wrapt" -version = "1.17.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531, upload-time = "2025-01-14T10:35:45.465Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/d1/1daec934997e8b160040c78d7b31789f19b122110a75eca3d4e8da0049e1/wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984", size = 53307, upload-time = "2025-01-14T10:33:13.616Z" }, - { url = "https://files.pythonhosted.org/packages/1b/7b/13369d42651b809389c1a7153baa01d9700430576c81a2f5c5e460df0ed9/wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22", size = 38486, upload-time = "2025-01-14T10:33:15.947Z" }, - { url = "https://files.pythonhosted.org/packages/62/bf/e0105016f907c30b4bd9e377867c48c34dc9c6c0c104556c9c9126bd89ed/wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7", size = 38777, upload-time = "2025-01-14T10:33:17.462Z" }, - { url = "https://files.pythonhosted.org/packages/27/70/0f6e0679845cbf8b165e027d43402a55494779295c4b08414097b258ac87/wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c", size = 83314, upload-time = "2025-01-14T10:33:21.282Z" }, - { url = "https://files.pythonhosted.org/packages/0f/77/0576d841bf84af8579124a93d216f55d6f74374e4445264cb378a6ed33eb/wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72", size = 74947, upload-time = "2025-01-14T10:33:24.414Z" }, - { url = "https://files.pythonhosted.org/packages/90/ec/00759565518f268ed707dcc40f7eeec38637d46b098a1f5143bff488fe97/wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061", size = 82778, upload-time = "2025-01-14T10:33:26.152Z" }, - { url = "https://files.pythonhosted.org/packages/f8/5a/7cffd26b1c607b0b0c8a9ca9d75757ad7620c9c0a9b4a25d3f8a1480fafc/wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2", size = 81716, upload-time = "2025-01-14T10:33:27.372Z" }, - { url = "https://files.pythonhosted.org/packages/7e/09/dccf68fa98e862df7e6a60a61d43d644b7d095a5fc36dbb591bbd4a1c7b2/wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c", size = 74548, upload-time = "2025-01-14T10:33:28.52Z" }, - { url = "https://files.pythonhosted.org/packages/b7/8e/067021fa3c8814952c5e228d916963c1115b983e21393289de15128e867e/wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62", size = 81334, upload-time = "2025-01-14T10:33:29.643Z" }, - { url = "https://files.pythonhosted.org/packages/4b/0d/9d4b5219ae4393f718699ca1c05f5ebc0c40d076f7e65fd48f5f693294fb/wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563", size = 36427, upload-time = "2025-01-14T10:33:30.832Z" }, - { url = "https://files.pythonhosted.org/packages/72/6a/c5a83e8f61aec1e1aeef939807602fb880e5872371e95df2137142f5c58e/wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f", size = 38774, upload-time = "2025-01-14T10:33:32.897Z" }, - { url = "https://files.pythonhosted.org/packages/cd/f7/a2aab2cbc7a665efab072344a8949a71081eed1d2f451f7f7d2b966594a2/wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58", size = 53308, upload-time = "2025-01-14T10:33:33.992Z" }, - { url = "https://files.pythonhosted.org/packages/50/ff/149aba8365fdacef52b31a258c4dc1c57c79759c335eff0b3316a2664a64/wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda", size = 38488, upload-time = "2025-01-14T10:33:35.264Z" }, - { url = "https://files.pythonhosted.org/packages/65/46/5a917ce85b5c3b490d35c02bf71aedaa9f2f63f2d15d9949cc4ba56e8ba9/wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438", size = 38776, upload-time = "2025-01-14T10:33:38.28Z" }, - { url = "https://files.pythonhosted.org/packages/ca/74/336c918d2915a4943501c77566db41d1bd6e9f4dbc317f356b9a244dfe83/wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a", size = 83776, upload-time = "2025-01-14T10:33:40.678Z" }, - { url = "https://files.pythonhosted.org/packages/09/99/c0c844a5ccde0fe5761d4305485297f91d67cf2a1a824c5f282e661ec7ff/wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000", size = 75420, upload-time = "2025-01-14T10:33:41.868Z" }, - { url = "https://files.pythonhosted.org/packages/b4/b0/9fc566b0fe08b282c850063591a756057c3247b2362b9286429ec5bf1721/wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6", size = 83199, upload-time = "2025-01-14T10:33:43.598Z" }, - { url = "https://files.pythonhosted.org/packages/9d/4b/71996e62d543b0a0bd95dda485219856def3347e3e9380cc0d6cf10cfb2f/wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b", size = 82307, upload-time = "2025-01-14T10:33:48.499Z" }, - { url = "https://files.pythonhosted.org/packages/39/35/0282c0d8789c0dc9bcc738911776c762a701f95cfe113fb8f0b40e45c2b9/wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662", size = 75025, upload-time = "2025-01-14T10:33:51.191Z" }, - { url = "https://files.pythonhosted.org/packages/4f/6d/90c9fd2c3c6fee181feecb620d95105370198b6b98a0770cba090441a828/wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72", size = 81879, upload-time = "2025-01-14T10:33:52.328Z" }, - { url = "https://files.pythonhosted.org/packages/8f/fa/9fb6e594f2ce03ef03eddbdb5f4f90acb1452221a5351116c7c4708ac865/wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317", size = 36419, upload-time = "2025-01-14T10:33:53.551Z" }, - { url = "https://files.pythonhosted.org/packages/47/f8/fb1773491a253cbc123c5d5dc15c86041f746ed30416535f2a8df1f4a392/wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3", size = 38773, upload-time = "2025-01-14T10:33:56.323Z" }, - { url = "https://files.pythonhosted.org/packages/a1/bd/ab55f849fd1f9a58ed7ea47f5559ff09741b25f00c191231f9f059c83949/wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925", size = 53799, upload-time = "2025-01-14T10:33:57.4Z" }, - { url = "https://files.pythonhosted.org/packages/53/18/75ddc64c3f63988f5a1d7e10fb204ffe5762bc663f8023f18ecaf31a332e/wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392", size = 38821, upload-time = "2025-01-14T10:33:59.334Z" }, - { url = "https://files.pythonhosted.org/packages/48/2a/97928387d6ed1c1ebbfd4efc4133a0633546bec8481a2dd5ec961313a1c7/wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40", size = 38919, upload-time = "2025-01-14T10:34:04.093Z" }, - { url = "https://files.pythonhosted.org/packages/73/54/3bfe5a1febbbccb7a2f77de47b989c0b85ed3a6a41614b104204a788c20e/wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d", size = 88721, upload-time = "2025-01-14T10:34:07.163Z" }, - { url = "https://files.pythonhosted.org/packages/25/cb/7262bc1b0300b4b64af50c2720ef958c2c1917525238d661c3e9a2b71b7b/wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b", size = 80899, upload-time = "2025-01-14T10:34:09.82Z" }, - { url = "https://files.pythonhosted.org/packages/2a/5a/04cde32b07a7431d4ed0553a76fdb7a61270e78c5fd5a603e190ac389f14/wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98", size = 89222, upload-time = "2025-01-14T10:34:11.258Z" }, - { url = "https://files.pythonhosted.org/packages/09/28/2e45a4f4771fcfb109e244d5dbe54259e970362a311b67a965555ba65026/wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82", size = 86707, upload-time = "2025-01-14T10:34:12.49Z" }, - { url = "https://files.pythonhosted.org/packages/c6/d2/dcb56bf5f32fcd4bd9aacc77b50a539abdd5b6536872413fd3f428b21bed/wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae", size = 79685, upload-time = "2025-01-14T10:34:15.043Z" }, - { url = "https://files.pythonhosted.org/packages/80/4e/eb8b353e36711347893f502ce91c770b0b0929f8f0bed2670a6856e667a9/wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9", size = 87567, upload-time = "2025-01-14T10:34:16.563Z" }, - { url = "https://files.pythonhosted.org/packages/17/27/4fe749a54e7fae6e7146f1c7d914d28ef599dacd4416566c055564080fe2/wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9", size = 36672, upload-time = "2025-01-14T10:34:17.727Z" }, - { url = "https://files.pythonhosted.org/packages/15/06/1dbf478ea45c03e78a6a8c4be4fdc3c3bddea5c8de8a93bc971415e47f0f/wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991", size = 38865, upload-time = "2025-01-14T10:34:19.577Z" }, - { url = "https://files.pythonhosted.org/packages/ce/b9/0ffd557a92f3b11d4c5d5e0c5e4ad057bd9eb8586615cdaf901409920b14/wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125", size = 53800, upload-time = "2025-01-14T10:34:21.571Z" }, - { url = "https://files.pythonhosted.org/packages/c0/ef/8be90a0b7e73c32e550c73cfb2fa09db62234227ece47b0e80a05073b375/wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998", size = 38824, upload-time = "2025-01-14T10:34:22.999Z" }, - { url = "https://files.pythonhosted.org/packages/36/89/0aae34c10fe524cce30fe5fc433210376bce94cf74d05b0d68344c8ba46e/wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5", size = 38920, upload-time = "2025-01-14T10:34:25.386Z" }, - { url = "https://files.pythonhosted.org/packages/3b/24/11c4510de906d77e0cfb5197f1b1445d4fec42c9a39ea853d482698ac681/wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8", size = 88690, upload-time = "2025-01-14T10:34:28.058Z" }, - { url = "https://files.pythonhosted.org/packages/71/d7/cfcf842291267bf455b3e266c0c29dcb675b5540ee8b50ba1699abf3af45/wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6", size = 80861, upload-time = "2025-01-14T10:34:29.167Z" }, - { url = "https://files.pythonhosted.org/packages/d5/66/5d973e9f3e7370fd686fb47a9af3319418ed925c27d72ce16b791231576d/wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc", size = 89174, upload-time = "2025-01-14T10:34:31.702Z" }, - { url = "https://files.pythonhosted.org/packages/a7/d3/8e17bb70f6ae25dabc1aaf990f86824e4fd98ee9cadf197054e068500d27/wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2", size = 86721, upload-time = "2025-01-14T10:34:32.91Z" }, - { url = "https://files.pythonhosted.org/packages/6f/54/f170dfb278fe1c30d0ff864513cff526d624ab8de3254b20abb9cffedc24/wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b", size = 79763, upload-time = "2025-01-14T10:34:34.903Z" }, - { url = "https://files.pythonhosted.org/packages/4a/98/de07243751f1c4a9b15c76019250210dd3486ce098c3d80d5f729cba029c/wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504", size = 87585, upload-time = "2025-01-14T10:34:36.13Z" }, - { url = "https://files.pythonhosted.org/packages/f9/f0/13925f4bd6548013038cdeb11ee2cbd4e37c30f8bfd5db9e5a2a370d6e20/wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a", size = 36676, upload-time = "2025-01-14T10:34:37.962Z" }, - { url = "https://files.pythonhosted.org/packages/bf/ae/743f16ef8c2e3628df3ddfd652b7d4c555d12c84b53f3d8218498f4ade9b/wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845", size = 38871, upload-time = "2025-01-14T10:34:39.13Z" }, - { url = "https://files.pythonhosted.org/packages/3d/bc/30f903f891a82d402ffb5fda27ec1d621cc97cb74c16fea0b6141f1d4e87/wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192", size = 56312, upload-time = "2025-01-14T10:34:40.604Z" }, - { url = "https://files.pythonhosted.org/packages/8a/04/c97273eb491b5f1c918857cd26f314b74fc9b29224521f5b83f872253725/wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b", size = 40062, upload-time = "2025-01-14T10:34:45.011Z" }, - { url = "https://files.pythonhosted.org/packages/4e/ca/3b7afa1eae3a9e7fefe499db9b96813f41828b9fdb016ee836c4c379dadb/wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0", size = 40155, upload-time = "2025-01-14T10:34:47.25Z" }, - { url = "https://files.pythonhosted.org/packages/89/be/7c1baed43290775cb9030c774bc53c860db140397047cc49aedaf0a15477/wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306", size = 113471, upload-time = "2025-01-14T10:34:50.934Z" }, - { url = "https://files.pythonhosted.org/packages/32/98/4ed894cf012b6d6aae5f5cc974006bdeb92f0241775addad3f8cd6ab71c8/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb", size = 101208, upload-time = "2025-01-14T10:34:52.297Z" }, - { url = "https://files.pythonhosted.org/packages/ea/fd/0c30f2301ca94e655e5e057012e83284ce8c545df7661a78d8bfca2fac7a/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681", size = 109339, upload-time = "2025-01-14T10:34:53.489Z" }, - { url = "https://files.pythonhosted.org/packages/75/56/05d000de894c4cfcb84bcd6b1df6214297b8089a7bd324c21a4765e49b14/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6", size = 110232, upload-time = "2025-01-14T10:34:55.327Z" }, - { url = "https://files.pythonhosted.org/packages/53/f8/c3f6b2cf9b9277fb0813418e1503e68414cd036b3b099c823379c9575e6d/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6", size = 100476, upload-time = "2025-01-14T10:34:58.055Z" }, - { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377, upload-time = "2025-01-14T10:34:59.3Z" }, - { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986, upload-time = "2025-01-14T10:35:00.498Z" }, - { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750, upload-time = "2025-01-14T10:35:03.378Z" }, - { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594, upload-time = "2025-01-14T10:35:44.018Z" }, -]