Skip to content

Commit 35432ee

Browse files
merged back from submodulev3, overrode Partitioner and Splitter changes
2 parents bdb4ee1 + bd1dd04 commit 35432ee

File tree

897 files changed

+23899
-14725
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

897 files changed

+23899
-14725
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
command: |
1212
source build_tools/shared.sh
1313
# Include pytest compatibility with mypy
14-
pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint
14+
pip install pytest $(get_dep ruff min) $(get_dep mypy min) $(get_dep black min) cython-lint
1515
- run:
1616
name: linting
1717
command: ./build_tools/linting.sh

.codecov.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,4 @@ ignore:
3030
- "sklearn/_build_utils"
3131
- "sklearn/__check_build"
3232
- "sklearn/_min_dependencies.py"
33-
- "**/setup.py"
3433
- "**/conftest.py"

.coveragerc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,3 @@ omit =
66
*/sklearn/externals/*
77
*/sklearn/_build_utils/*
88
*/benchmarks/*
9-
**/setup.py

.github/workflows/artifact-redirector.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
name: Run CircleCI artifacts redirector
1616
steps:
1717
- name: GitHub Action step
18-
uses: larsoner/circleci-artifacts-redirector-action@master
18+
uses: scientific-python/circleci-artifacts-redirector-action@v1
1919
with:
2020
repo-token: ${{ secrets.GITHUB_TOKEN }}
2121
api-token: ${{ secrets.CIRCLECI_TOKEN }}

.github/workflows/assign.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,11 @@ jobs:
1919
&& !github.event.issue.assignee
2020
steps:
2121
- run: |
22+
# Using REST API directly because assigning through gh has some severe limitations. For more details, see
23+
# https://github.com/scikit-learn/scikit-learn/issues/29395#issuecomment-2206776963
2224
echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
23-
gh issue edit $ISSUE --add-assignee ${{ github.event.comment.user.login }}
25+
curl -H "Authorization: token $GH_TOKEN" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' \
26+
https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
2427
gh issue edit $ISSUE --remove-label "help wanted"
2528
env:
2629
GH_TOKEN: ${{ github.token }}

.github/workflows/check-changelog.yml

Lines changed: 12 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -4,65 +4,30 @@ name: Check Changelog
44
# To bypass this check, label the PR with "No Changelog Needed".
55
on:
66
pull_request:
7-
types: [opened, edited, labeled, unlabeled, synchronize]
7+
types: [opened, synchronize, labeled, unlabeled]
88

99
jobs:
1010
check:
1111
name: A reviewer will let you know if it is required or can be bypassed
1212
runs-on: ubuntu-latest
13-
if: ${{ contains(github.event.pull_request.labels.*.name, 'No Changelog Needed') == 0 }}
1413
steps:
15-
- name: Get PR number and milestone
16-
run: |
17-
echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
18-
echo "TAGGED_MILESTONE=${{ github.event.pull_request.milestone.title }}" >> $GITHUB_ENV
1914
- uses: actions/checkout@v4
2015
with:
2116
fetch-depth: '0'
22-
- name: Check the changelog entry
17+
- name: Check if tests have changed
18+
id: tests_changed
2319
run: |
2420
set -xe
2521
changed_files=$(git diff --name-only origin/main)
2622
# Changelog should be updated only if tests have been modified
27-
if [[ ! "$changed_files" =~ tests ]]
28-
then
29-
exit 0
30-
fi
31-
all_changelogs=$(cat ./doc/whats_new/v*.rst)
32-
if [[ "$all_changelogs" =~ :pr:\`$PR_NUMBER\` ]]
23+
if [[ "$changed_files" =~ tests ]]
3324
then
34-
echo "Changelog has been updated."
35-
# If the pull request is milestoned check the correspondent changelog
36-
if exist -f ./doc/whats_new/v${TAGGED_MILESTONE:0:4}.rst
37-
then
38-
expected_changelog=$(cat ./doc/whats_new/v${TAGGED_MILESTONE:0:4}.rst)
39-
if [[ "$expected_changelog" =~ :pr:\`$PR_NUMBER\` ]]
40-
then
41-
echo "Changelog and milestone correspond."
42-
else
43-
echo "Changelog and milestone do not correspond."
44-
echo "If you see this error make sure that the tagged milestone for the PR"
45-
echo "and the edited changelog filename properly match."
46-
exit 1
47-
fi
48-
fi
49-
else
50-
echo "A Changelog entry is missing."
51-
echo ""
52-
echo "Please add an entry to the changelog at 'doc/whats_new/v*.rst'"
53-
echo "to document your change assuming that the PR will be merged"
54-
echo "in time for the next release of scikit-learn."
55-
echo ""
56-
echo "Look at other entries in that file for inspiration and please"
57-
echo "reference this pull request using the ':pr:' directive and"
58-
echo "credit yourself (and other contributors if applicable) with"
59-
echo "the ':user:' directive."
60-
echo ""
61-
echo "If you see this error and there is already a changelog entry,"
62-
echo "check that the PR number is correct."
63-
echo ""
64-
echo "If you believe that this PR does not warrant a changelog"
65-
echo "entry, say so in a comment so that a maintainer will label"
66-
echo "the PR with 'No Changelog Needed' to bypass this check."
67-
exit 1
25+
echo "check_changelog=true" >> $GITHUB_OUTPUT
6826
fi
27+
28+
- name: Check changelog entry
29+
if: steps.tests_changed.outputs.check_changelog == 'true'
30+
uses: scientific-python/action-towncrier-changelog@v1
31+
env:
32+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
33+
BOT_USERNAME: changelog-bot

.github/workflows/cuda-ci.yml

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
name: CUDA GPU
2+
3+
# Only run this workflow when a Pull Request is labeled with the
4+
# 'CUDA CI' label.
5+
on:
6+
pull_request:
7+
types:
8+
- labeled
9+
10+
jobs:
11+
build_wheel:
12+
if: contains(github.event.pull_request.labels.*.name, 'CUDA CI')
13+
runs-on: "ubuntu-latest"
14+
name: Build wheel for Pull Request
15+
steps:
16+
- uses: actions/checkout@v4
17+
18+
- name: Build wheels
19+
uses: pypa/cibuildwheel@v2.21.3
20+
env:
21+
CIBW_BUILD: cp312-manylinux_x86_64
22+
CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
23+
CIBW_BUILD_VERBOSITY: 1
24+
CIBW_ARCHS: x86_64
25+
26+
- uses: actions/upload-artifact@v4
27+
with:
28+
name: cibw-wheels
29+
path: ./wheelhouse/*.whl
30+
31+
tests:
32+
if: contains(github.event.pull_request.labels.*.name, 'CUDA CI')
33+
needs: [build_wheel]
34+
runs-on:
35+
group: cuda-gpu-runner-group
36+
# Set this high enough so that the tests can comforatble run. We set a
37+
# timeout to make abusing this workflow less attractive.
38+
timeout-minutes: 20
39+
name: Run Array API unit tests
40+
steps:
41+
- uses: actions/download-artifact@v4
42+
with:
43+
pattern: cibw-wheels
44+
path: ~/dist
45+
46+
- uses: actions/setup-python@v5
47+
with:
48+
# XXX: The 3.12.4 release of Python on GitHub Actions is corrupted:
49+
# https://github.com/actions/setup-python/issues/886
50+
python-version: '3.12.3'
51+
- name: Checkout main repository
52+
uses: actions/checkout@v4
53+
- name: Cache conda environment
54+
id: cache-conda
55+
uses: actions/cache@v4
56+
with:
57+
path: ~/conda
58+
key: ${{ runner.os }}-build-${{ hashFiles('build_tools/github/create_gpu_environment.sh') }}-${{ hashFiles('build_tools/github/pylatest_conda_forge_cuda_array-api_linux-64_conda.lock') }}
59+
- name: Install miniforge
60+
if: ${{ steps.cache-conda.outputs.cache-hit != 'true' }}
61+
run: bash build_tools/github/create_gpu_environment.sh
62+
- name: Install scikit-learn
63+
run: |
64+
source "${HOME}/conda/etc/profile.d/conda.sh"
65+
conda activate sklearn
66+
pip install ~/dist/cibw-wheels/$(ls ~/dist/cibw-wheels)
67+
68+
- name: Run array API tests
69+
run: |
70+
source "${HOME}/conda/etc/profile.d/conda.sh"
71+
conda activate sklearn
72+
python -c "import sklearn; sklearn.show_versions()"
73+
74+
SCIPY_ARRAY_API=1 pytest --pyargs sklearn -k 'array_api'
75+
# Run in /home/runner to not load sklearn from the checkout repo
76+
working-directory: /home/runner

.github/workflows/cuda-gpu-ci.yml

Lines changed: 0 additions & 46 deletions
This file was deleted.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: Remove "CUDA CI" Label
2+
3+
# This workflow removes the "CUDA CI" label that triggers the actual
4+
# CUDA CI. It is separate so that we can use the `pull_request_target`
5+
# trigger which has a API token with write access.
6+
on:
7+
pull_request_target:
8+
types:
9+
- labeled
10+
11+
# In order to remove the "CUDA CI" label we need to have write permissions for PRs
12+
permissions:
13+
pull-requests: write
14+
15+
jobs:
16+
label-remover:
17+
if: contains(github.event.pull_request.labels.*.name, 'CUDA CI')
18+
name: Remove "CUDA CI" Label
19+
runs-on: ubuntu-20.04
20+
steps:
21+
- uses: actions-ecosystem/action-remove-labels@v1
22+
with:
23+
labels: CUDA CI

.github/workflows/lint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
run: |
3434
source build_tools/shared.sh
3535
# Include pytest compatibility with mypy
36-
pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint
36+
pip install pytest $(get_dep ruff min) $(get_dep mypy min) $(get_dep black min) cython-lint
3737
# we save the versions of the linters to be used in the error message later.
3838
python -c "from importlib.metadata import version; print(f\"ruff={version('ruff')}\")" >> /tmp/versions.txt
3939
python -c "from importlib.metadata import version; print(f\"mypy={version('mypy')}\")" >> /tmp/versions.txt

.github/workflows/publish_pypi.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,13 @@ jobs:
3939
run: |
4040
python build_tools/github/check_wheels.py
4141
- name: Publish package to TestPyPI
42-
uses: pypa/gh-action-pypi-publish@81e9d935c883d0b210363ab89cf05f3894778450 # v1.8.14
42+
uses: pypa/gh-action-pypi-publish@fb13cb306901256ace3dab689990e13a5550ffaa # v1.11.0
4343
with:
4444
repository-url: https://test.pypi.org/legacy/
4545
print-hash: true
4646
if: ${{ github.event.inputs.pypi_repo == 'testpypi' }}
4747
- name: Publish package to PyPI
48-
uses: pypa/gh-action-pypi-publish@81e9d935c883d0b210363ab89cf05f3894778450 # v1.8.14
48+
uses: pypa/gh-action-pypi-publish@fb13cb306901256ace3dab689990e13a5550ffaa # v1.11.0
4949
if: ${{ github.event.inputs.pypi_repo == 'pypi' }}
5050
with:
5151
print-hash: true
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Workflow to update lock files in a PR, triggered by specific PR comments
2+
name: Update lock files in PR
3+
on:
4+
issue_comment:
5+
types: [created]
6+
7+
permissions:
8+
contents: write
9+
statuses: write
10+
11+
jobs:
12+
update-lock-files:
13+
if: >-
14+
github.repository == 'scikit-learn/scikit-learn'
15+
&& github.event.issue.pull_request
16+
&& startsWith(github.event.comment.body, '@scikit-learn-bot update lock-files')
17+
runs-on: ubuntu-latest
18+
19+
steps:
20+
# There is no direct way to get the HEAD information directly from issue_comment
21+
# event, so we use the GitHub CLI to get the PR head ref and repository
22+
- name: Get pull request HEAD information
23+
id: pr-head-info
24+
env:
25+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
26+
run: |
27+
pr_info=$(gh pr view ${{ github.event.issue.number }} --repo ${{ github.repository }} --json headRefName,headRefOid,headRepository,headRepositoryOwner)
28+
pr_head_ref=$(echo "$pr_info" | jq -r '.headRefName')
29+
pr_head_sha=$(echo "$pr_info" | jq -r '.headRefOid')
30+
pr_head_repository=$(echo "$pr_info" | jq -r '.headRepositoryOwner.login + "/" + .headRepository.name')
31+
echo "pr_head_ref=$pr_head_ref" >> $GITHUB_OUTPUT
32+
echo "pr_head_sha=$pr_head_sha" >> $GITHUB_OUTPUT
33+
echo "pr_head_repository=$pr_head_repository" >> $GITHUB_OUTPUT
34+
35+
# Set the status of the latest commit in the PR to indicate that the update is in progress
36+
# https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status
37+
- name: Set pending status
38+
env:
39+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
40+
run: |
41+
gh api \
42+
--method POST \
43+
-H "Accept: application/vnd.github+json" \
44+
-H "X-GitHub-Api-Version: 2022-11-28" \
45+
/repos/${{ github.repository }}/statuses/${{ steps.pr-head-info.outputs.pr_head_sha }} \
46+
-f "state=pending" \
47+
-f "target_url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
48+
-f "description=Updating lock files..." \
49+
-f "context=update-lock-files-pr"
50+
51+
- name: Check out the PR branch
52+
uses: actions/checkout@v4
53+
with:
54+
ref: ${{ steps.pr-head-info.outputs.pr_head_ref }}
55+
repository: ${{ steps.pr-head-info.outputs.pr_head_repository }}
56+
57+
# We overwrite all the scripts we are going to use in this workflow with their
58+
# versions on main; since this workflow has the write permissions this is to avoid
59+
# malicious changes to these scripts in PRs to be executed
60+
- name: Download scripts from main
61+
run: |
62+
curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/shared.sh --retry 5 -o ./build_tools/shared.sh
63+
curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/update_environments_and_lock_files.py --retry 5 -o ./build_tools/update_environments_and_lock_files.py
64+
curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/on_pr_comment_update_environments_and_lock_files.py --retry 5 -o ./build_tools/on_pr_comment_update_environments_and_lock_files.py
65+
66+
- name: Update lock files
67+
env:
68+
COMMENT: ${{ github.event.comment.body }}
69+
# We download the lock files update scripts from main, since this workflow is
70+
# run from main itself
71+
run: |
72+
source build_tools/shared.sh
73+
source $CONDA/bin/activate
74+
conda install -n base conda conda-libmamba-solver -y
75+
conda config --set solver libmamba
76+
conda install -c conda-forge "$(get_dep conda-lock min)" -y
77+
78+
python build_tools/on_pr_comment_update_environments_and_lock_files.py
79+
80+
- name: Set completion status
81+
if: ${{ always() }}
82+
env:
83+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
84+
run: |
85+
gh api \
86+
--method POST \
87+
-H "Accept: application/vnd.github+json" \
88+
-H "X-GitHub-Api-Version: 2022-11-28" \
89+
/repos/${{ github.repository }}/statuses/${{ steps.pr-head-info.outputs.pr_head_sha }} \
90+
-f "state=${{ job.status == 'success' && 'success' || 'error' }}" \
91+
-f "target_url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
92+
-f "description=Lock files ${{ job.status == 'success' && 'updated' || 'failed to update' }}." \
93+
-f "context=update-lock-files-pr"

0 commit comments

Comments
 (0)