Skip to content

Commit 2882cac

Browse files
authored
Merge branch 'develop' into feature/dul-extensions
2 parents c782e5d + 452790c commit 2882cac

File tree

194 files changed

+13535
-5041
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

194 files changed

+13535
-5041
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.9.3.dev0
2+
current_version = 0.10.1.dev0
33
commit = False
44
tag = False
55
allow_dirty = False

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ notebooks/*.ipynb -linguist-detectable
22
*.png filter=lfs diff=lfs merge=lfs -text
33
*.svg filter=lfs diff=lfs merge=lfs -text
44
logo.svg -filter=lfs -diff=lfs -merge=lfs text
5+
*.pkl filter=lfs diff=lfs merge=lfs -text

.github/workflows/main.yaml

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -112,13 +112,13 @@ jobs:
112112
group_number: ${{ matrix.group_number }}
113113
needs: [ code-quality ]
114114

115-
push-docs-and-release-testpypi:
116-
name: Publish documentation # and maybe release package to TestPyPI
115+
publish-docs:
116+
name: Publish documentation
117117
runs-on: ubuntu-22.04
118118
needs: [docs, group-tests, notebook-tests]
119-
if: ${{ github.ref == 'refs/heads/develop' }}
119+
if: ${{ github.ref == 'refs/heads/develop' && github.event_name != 'pull_request' }}
120120
concurrency:
121-
group: publish
121+
group: publish-docs
122122
steps:
123123
- uses: actions/checkout@v4
124124
with:
@@ -140,14 +140,37 @@ jobs:
140140
title: Development
141141
email: ${{ env.GITHUB_BOT_EMAIL }}
142142
username: ${{ env.GITHUB_BOT_USERNAME }}
143-
# Deactivated until we solve the issue with the credentials (Jan. 2025)
144-
# - name: Build and publish to TestPyPI
145-
# env:
146-
# TWINE_USERNAME: __token__
147-
# TWINE_PASSWORD: ${{ secrets.TEST_PYPI_PASSWORD }}
148-
# run: |
149-
# set -x
150-
# export BUILD_NUMBER=$GITHUB_RUN_NUMBER
151-
# bump2version --no-tag --no-commit --verbose --serialize '{major}.{minor}.{patch}.{release}{$BUILD_NUMBER}' boguspart
152-
# python setup.py sdist bdist_wheel
153-
# twine upload -r testpypi --verbose --non-interactive dist/*
143+
144+
145+
publish-testpypi:
146+
name: Publish package to TestPyPI
147+
runs-on: ubuntu-22.04
148+
needs: [group-tests, notebook-tests]
149+
if: ${{ github.ref == 'refs/heads/develop' && github.event_name != 'pull_request' }}
150+
concurrency:
151+
group: publish-testpypi
152+
permissions:
153+
id-token: write
154+
environment:
155+
name: testpypi
156+
steps:
157+
- uses: actions/checkout@v4
158+
with:
159+
fetch-depth: 0
160+
lfs: true
161+
- name: Setup Python 3.9
162+
uses: ./.github/actions/python
163+
with:
164+
python_version: 3.9
165+
- name: Bump version and build package
166+
run: |
167+
set -x
168+
export BUILD_NUMBER=$GITHUB_RUN_NUMBER
169+
bump2version --no-tag --no-commit --verbose --serialize '{major}.{minor}.{patch}.{release}{$BUILD_NUMBER}' boguspart
170+
python setup.py sdist bdist_wheel
171+
- name: Publish to TestPyPI
172+
uses: pypa/gh-action-pypi-publish@release/v1
173+
with:
174+
repository-url: https://test.pypi.org/legacy/
175+
print-hash: true
176+
verbose: true

.github/workflows/publish.yaml

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,13 @@ env:
2323
jobs:
2424
publish:
2525
runs-on: ubuntu-22.04
26+
environment:
27+
name: publish
28+
url: https://pypi.org/pydvl
2629
concurrency:
2730
group: publish
31+
permissions:
32+
id-token: write
2833
steps:
2934
- name: Checking out last commit in release
3035
if: ${{ github.event_name != 'workflow_dispatch' }}
@@ -54,6 +59,14 @@ jobs:
5459
# Make the version available as env variable for next steps
5560
echo CURRENT_VERSION=$CURRENT_VERSION >> $GITHUB_ENV
5661
shell: bash
62+
- name: Build dist
63+
run: |
64+
python setup.py sdist bdist_wheel
65+
- name: Publish to PyPI
66+
uses: pypa/gh-action-pypi-publish@release/v1
67+
with:
68+
verbose: true
69+
print-hash: true
5770
- name: Deploy Docs
5871
uses: ./.github/actions/deploy-docs
5972
with:
@@ -63,10 +76,3 @@ jobs:
6376
email: ${{ env.GITHUB_BOT_EMAIL }}
6477
username: ${{ env.GITHUB_BOT_USERNAME }}
6578
set-default: 'true'
66-
- name: Build and publish to PyPI
67-
env:
68-
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
69-
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
70-
run: |
71-
python setup.py sdist bdist_wheel
72-
twine upload --verbose --non-interactive dist/*

.github/workflows/run-notebook-tests-workflow.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,12 @@ jobs:
2626
- name: Free Disk Space (Ubuntu)
2727
uses: jlumbroso/free-disk-space@main
2828
with:
29-
large-packages: false
29+
large-packages: true
3030
docker-images: false
3131
- uses: actions/checkout@v4
3232
with:
3333
fetch-depth: 0
34+
lfs: true
3435
- name: Setup Python ${{ inputs.python_version }}
3536
uses: ./.github/actions/python
3637
with:

.gitignore

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ celerybeat.pid
110110
.venv
111111
env/
112112
venv/
113-
venv38/
113+
venv39/
114114
ENV/
115115
env.bak/
116116
venv.bak/
@@ -141,9 +141,18 @@ pylint.html
141141
runs/
142142
data/models/
143143
*.pkl
144+
!data/*.pkl
144145

145146
# Docs
146147
docs_build
147148

148149
# pytest-profiling
149150
prof/
151+
152+
# JS tooling
153+
node_modules/
154+
package.json
155+
package-lock.json
156+
157+
#
158+
.serena

.test_durations

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,8 +1521,8 @@
15211521
"tests/valuation/methods/test_semivalues.py::test_coefficients[BetaShapleyValuation-kwargs1-10]": 0.0016590010000072652,
15221522
"tests/valuation/methods/test_semivalues.py::test_coefficients[BetaShapleyValuation-kwargs2-100]": 0.0022294990000091275,
15231523
"tests/valuation/methods/test_semivalues.py::test_coefficients[BetaShapleyValuation-kwargs2-10]": 0.003863207999984297,
1524-
"tests/valuation/methods/test_semivalues.py::test_coefficients[DataBanzhafValuation-kwargs3-100]": 0.001800666000065121,
1525-
"tests/valuation/methods/test_semivalues.py::test_coefficients[DataBanzhafValuation-kwargs3-10]": 0.0016530420000435697,
1524+
"tests/valuation/methods/test_semivalues.py::test_coefficients[BanzhafValuation-kwargs3-100]": 0.001800666000065121,
1525+
"tests/valuation/methods/test_semivalues.py::test_coefficients[BanzhafValuation-kwargs3-10]": 0.0016530420000435697,
15261526
"tests/valuation/methods/test_semivalues.py::test_coefficients[ShapleyValuation-kwargs4-100]": 0.0018769589999578784,
15271527
"tests/valuation/methods/test_semivalues.py::test_coefficients[ShapleyValuation-kwargs4-10]": 0.0016063749999375432,
15281528
"tests/valuation/methods/test_semivalues.py::test_msr_banzhaf[5]": 9.342398666999998,
@@ -1636,10 +1636,10 @@
16361636
"tests/valuation/scorers/test_classwise.py::test_classwise_scorer[test_data2-expected_scores2]": 0.0025690839999974457,
16371637
"tests/valuation/scorers/test_scorers.py::test_compose_score": 0.0019082069999996065,
16381638
"tests/valuation/scorers/test_scorers.py::test_scorer": 0.001976999999998341,
1639-
"tests/valuation/test_interface.py::test_data_banzhaf_valuation[1]": 0.0836418330000015,
1640-
"tests/valuation/test_interface.py::test_data_banzhaf_valuation[2]": 1.2780167490000025,
1641-
"tests/valuation/test_interface.py::test_data_beta_shapley_valuation[1]": 4.139234666999997,
1642-
"tests/valuation/test_interface.py::test_data_beta_shapley_valuation[2]": 3.603092916999998,
1639+
"tests/valuation/test_interface.py::test_banzhaf_valuation[1]": 0.0836418330000015,
1640+
"tests/valuation/test_interface.py::test_banzhaf_valuation[2]": 1.2780167490000025,
1641+
"tests/valuation/test_interface.py::test_beta_shapley_valuation[1]": 4.139234666999997,
1642+
"tests/valuation/test_interface.py::test_beta_shapley_valuation[2]": 3.603092916999998,
16431643
"tests/valuation/test_interface.py::test_shapley_valuation[1]": 0.27120083299999465,
16441644
"tests/valuation/test_interface.py::test_shapley_valuation[2]": 0.15037520699999618,
16451645
"tests/valuation/test_interface.py::test_data_utility_learning[1]": 0.026216332999993597,
@@ -1781,10 +1781,6 @@
17811781
"tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[owen-kwargs1-scorer0-0.2-2-0-21]": 6.573138832000012,
17821782
"tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[owen_antithetic-kwargs2-scorer0-0.2-2-0-21]": 10.124256999999972,
17831783
"tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[permutation_montecarlo-kwargs0-scorer0-0.2-2-0-21]": 2.7115268339999545,
1784-
"tests/value/shapley/test_montecarlo.py::test_montecarlo_shapley_housing_dataset[12-3-12-combinatorial_montecarlo-kwargs0]": 0.16786966001382098,
1785-
"tests/value/shapley/test_montecarlo.py::test_montecarlo_shapley_housing_dataset[12-3-12-owen-kwargs1]": 17.011920137971174,
1786-
"tests/value/shapley/test_montecarlo.py::test_montecarlo_shapley_housing_dataset[12-3-12-owen_antithetic-kwargs2]": 35.88025256394758,
1787-
"tests/value/shapley/test_montecarlo.py::test_montecarlo_shapley_housing_dataset[12-3-4-group_testing-kwargs3]": 0.25901710899779573,
17881784
"tests/value/shapley/test_montecarlo.py::test_seed[combinatorial_montecarlo-kwargs0-test_game0]": 0.04085670800000685,
17891785
"tests/value/shapley/test_montecarlo.py::test_seed[group_testing-kwargs3-test_game0]": 0.23488145900003587,
17901786
"tests/value/shapley/test_montecarlo.py::test_seed[owen-kwargs1-test_game0]": 0.30296191700003305,

CHANGELOG.md

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,38 @@
22

33
## Unreleased
44

5+
### Added
6+
7+
- Support for `torch.Tensor` as underlying data type in `Dataset` and
8+
`GroupedDataset`
9+
[PR #673](https://github.com/aai-institute/pyDVL/pull/673)
10+
- Support for pytorch models in most valuation methods when wrapped in
11+
classes implementing the protocol `TorchSupervisedModel`, e.g. by using
12+
[skorch.NeuralNetClassifier](https://skorch.readthedocs.io/en/stable/classifier.html)
13+
models
14+
[PR #673](https://github.com/aai-institute/pyDVL/pull/673)
15+
16+
### Fixed
17+
18+
- Issues with `Dataset` indexing
19+
[PR #673](https://github.com/aai-institute/pyDVL/pull/673)
20+
21+
### Changed
22+
23+
24+
## v0.10.0 - 💥📚🐞🆕 New valuation interface, improved docs, new methods, breaking changes and tons of improvements
25+
526

627
### Added
728

29+
- Simple result serialization to resume computation of values
30+
[PR #666](https://github.com/aai-institute/pyDVL/pull/666)
31+
- Simple memory monitor / reporting
32+
[PR #663](https://github.com/aai-institute/pyDVL/pull/663)
33+
- New stopping criterion `MaxSamples`
34+
[PR #661](https://github.com/aai-institute/pyDVL/pull/661)
35+
- New pseudo-stopping criterion `History` to record value updates during fitting
36+
[PR #653](https://github.com/aai-institute/pyDVL/pull/653)
837
- Introduced `UtilityModel` and two implementations `IndicatorUtilityModel`
938
and `DeepSetsUtilityModel` for data utility learning
1039
[PR #650](https://github.com/aai-institute/pyDVL/pull/650)
@@ -25,6 +54,7 @@
2554
- Refactor MSR Banzhaf semivalues with the new sampler architecture.
2655
[PR #605](https://github.com/aai-institute/pyDVL/pull/605)
2756
[PR #641](https://github.com/aai-institute/pyDVL/pull/641)
57+
[PR #653](https://github.com/aai-institute/pyDVL/pull/653)
2858
- Refactor group-testing shapley values with new sampler architecture
2959
[PR #602](https://github.com/aai-institute/pyDVL/pull/602)
3060
- Refactor least-core data valuation methods with more supported sampling
@@ -56,8 +86,10 @@
5686

5787
### Fixed
5888

59-
- Fixed `show_warnings=False` not being respected in subprocesses
89+
- Fixed `show_warnings=False` not being respected in subprocesses. Introduced
90+
`suppress_warninigs` decorator for more flexibility
6091
[PR #647](https://github.com/aai-institute/pyDVL/pull/647)
92+
[PR #662](https://github.com/aai-institute/pyDVL/pull/662)
6193
- Fixed several bugs in diverse stopping criteria, including: iteration counts,
6294
computing completion, resetting, nested composition
6395
[PR #641](https://github.com/aai-institute/pyDVL/pull/641)
@@ -83,6 +115,13 @@
83115

84116
### Changed
85117

118+
- Major rewrite of the documentation for the new interface in 0.10.0
119+
[PR #653](https://github.com/aai-institute/pyDVL/pull/653)
120+
- Slicing, comparing and setting of `ValuationResult` behave in a more
121+
natural and consistent way. Several bug fixes.
122+
[PR #653](https://github.com/aai-institute/pyDVL/pull/653)
123+
[PR #660](https://github.com/aai-institute/pyDVL/pull/660)
124+
[PR #666](https://github.com/aai-institute/pyDVL/pull/666)
86125
- Switched all semi-value coefficients and sampler weights to log-space in
87126
order to avoid overflows
88127
[PR #643](https://github.com/aai-institute/pyDVL/pull/643)
@@ -151,6 +190,8 @@
151190
- renaming `PreConditioner` -> `Preconditioner`
152191
- fit to `TensorOperator`
153192
[PR #601](https://github.com/aai-institute/pyDVL/pull/601)
193+
- Bumped `zarr` dependency to v3
194+
[PR #668](https://github.com/aai-institute/pyDVL/pull/668)
154195

155196

156197
## 0.9.2 - 🏗 Bug fixes, logging improvement

CLAUDE.local.md

Whitespace-only changes.

CONTRIBUTING.md

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ If you are interested in setting up a similar project, consider the template
1515

1616
## Local development
1717

18-
This project uses [black](https://github.com/psf/black) to format code and
18+
This project uses [ruff](https://github.com/astral-sh/ruff) to lint and format code and
1919
[pre-commit](https://pre-commit.com/) to invoke it as a git pre-commit hook.
20-
Consider installing any of [black's IDE
21-
integrations](https://black.readthedocs.io/en/stable/integrations/editors.html)
20+
Consider installing any of [ruff's IDE
21+
integrations](https://docs.astral.sh/ruff/editors/setup/)
2222
to make your life easier.
2323

2424
Run the following to set up the pre-commit git hook to run before pushes:
@@ -83,7 +83,7 @@ If you use remote execution, don't forget to exclude data paths from deployment
8383
## Testing
8484

8585
Automated builds, tests, generation of documentation and publishing are handled
86-
by [CI pipelines](#CI). Before pushing your changes to the remote we recommend
86+
by [CI pipelines](#ci). Before pushing your changes to the remote we recommend
8787
to execute `tox` locally in order to detect mistakes early on and to avoid
8888
failing pipelines. tox will:
8989
* run the test suite
@@ -297,6 +297,33 @@ the environment variable `DYLD_FALLBACK_LIBRARY_PATH`:
297297
export DYLD_FALLBACK_LIBRARY_PATH=$DYLD_FALLBACK_LIBRARY_PATH:/opt/homebrew/lib
298298
```
299299

300+
### Automatic API documentation
301+
302+
We use [mkdocstrings](https://mkdocstrings.github.io/) to automatically generate
303+
API documentation from docstrings, following almost verbatim [this
304+
recipe](https://mkdocstrings.github.io/recipes/#automatic-code-reference-pages):
305+
Stubs are generated for all modules on the fly using
306+
[generate_api_docs.py](https://github.com/aai-institute/pyDVL/blob/develop/build_scripts/generate_api_docs.py) thanks to the pluging
307+
[mkdocstrings-gen-files](https://github.com/oprypin/mkdocs-gen-files) and
308+
navigation is generated for
309+
[mkdocs-literate-nav](https://github.com/oprypin/mkdocs-literate-nav).
310+
311+
With some renaming and using
312+
[section-index](https://github.com/oprypin/mkdocs-section-index) `__init__.py`
313+
files are used as entry points for the documentation of a module.
314+
315+
Since very often we re-export symbols in the `__init__.py` files, the automatic
316+
generation of the documentation skips **all** symbols in those files. If you
317+
want to document any in particular you can do so by **overriding
318+
mkdocs_genfiles**: Create a file under `docs/api/pydvl/module/index.md` and add
319+
your documentation there. For example, to document the whole module and every
320+
(re-)exported symbol just add this to the file:
321+
322+
```markdown
323+
::: pydvl.module
324+
```
325+
326+
300327
### Adding new pages
301328

302329
Navigation is configured in `mkdocs.yaml` using the nav section. We use the
@@ -388,7 +415,7 @@ To add a citation inside a markdown file, use the notation `[@ citekey]` (with
388415
no space). Alas,
389416
because of when mkdocs-bibtex enters the pipeline, it won't process docstrings.
390417
For module documentation, we manually inject html into the markdown files. For
391-
example, in `pydvl.value.shapley.montecarlo` we have:
418+
example, in `pydvl.valuation.methods.shapley` we have:
392419

393420
```markdown
394421
"""
@@ -441,7 +468,7 @@ use braces for legibility like in the first example.
441468
### Abbreviations
442469

443470
We keep the abbreviations used in the documentation inside the
444-
[docs_include/abbreviations.md](https://github.com/aai-institute/pyDVL/blob/develop/docs_includes%2Fabbreviations.md) file.
471+
[docs_include/abbreviations.md](https://github.com/aai-institute/pyDVL/blob/develop/docs_includes/abbreviations.md) file.
445472

446473
The syntax for abbreviations is:
447474

@@ -649,8 +676,18 @@ part):
649676
```shell
650677
build_scripts/release-version.sh 0.1.6
651678
```
679+
This will:
680+
681+
- Pull latest remote version of develop (fast-forward only) from origin
682+
- Create a branch `release/v0.1.6`
683+
- Bump version number: 0.1.5.dev1234 ⟶ 0.1.6
684+
- Merge release branch into master locally and on origin
685+
- Tag as `v0.1.6`
686+
- Bump version number again to next development pre-release
687+
- Merge release branch into develop locally and on origin
688+
- Delete release branch
652689

653-
To find out how to use the script, pass the `-h` or `--help` flags:
690+
For usage details, pass the `-h` or `--help` flags:
654691

655692
```shell
656693
build_scripts/release-version.sh --help
@@ -660,11 +697,10 @@ If running in interactive mode (without `-y|--yes`), the script will output a
660697
summary of pending changes and ask for confirmation before executing the
661698
actions.
662699

663-
Once this is done, a tag will be created on the repository. You should then
664-
create a GitHub
700+
Once the script is done, you should then create a GitHub
665701
[release](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository#creating-a-release)
666-
for that tag. That will a trigger a CI pipeline that will automatically create a
667-
package and publish it from CI to PyPI.
702+
for the tag that was created. That will a trigger a CI pipeline that will
703+
automatically create a package and publish it from CI to PyPI.
668704

669705
### Manual release process
670706

0 commit comments

Comments
 (0)