Skip to content

Commit 49ee412

Browse files
enryHri-hemeHenry Webelmpielies
authored
✨ 🔀 Continuous perturbations
Add continuous feature perturbations. --------- Co-authored-by: Ricardo Hernández Medina <ricardo.hernandez.medina@sund.ku.dk> Co-authored-by: Henry Webel <henry.webel@cpr.ku.dk> Co-authored-by: Marc Pielies Avelli <mpielies@gmail.com>
1 parent 92bced0 commit 49ee412

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+27935
-313
lines changed

.github/workflows/release.yaml

Lines changed: 101 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,111 @@
1-
name: release on pypi
1+
name: CI
22
on:
33
push:
4-
branches:
5-
- main
4+
pull_request:
5+
# branches:
6+
# - main
7+
68
jobs:
9+
format:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v4
13+
- uses: psf/black@stable
14+
lint:
15+
name: Lint with flake8
16+
runs-on: ubuntu-latest
17+
steps:
18+
- uses: actions/checkout@v4
19+
20+
- uses: actions/setup-python@v5
21+
with:
22+
python-version: "3.11"
23+
- name: Install flake8
24+
run: pip install flake8 flake8-bugbear
25+
- name: Lint with flake8
26+
run: flake8 src
27+
run-tutorial:
28+
name: Run tutorial - random_small
29+
runs-on: ubuntu-latest
30+
steps:
31+
- uses: actions/checkout@v4
32+
- uses: actions/setup-python@v5
33+
with:
34+
python-version: "3.11"
35+
- name: Install dependencies
36+
run: pip install .
37+
- name: Prepare tutorial data
38+
run: |
39+
cd tutorial
40+
move-dl data=random_small task=encode_data --cfg job
41+
move-dl data=random_small task=encode_data
42+
- name: Train model and analyze latent space
43+
run: |
44+
cd tutorial
45+
move-dl data=random_small task=random_small__latent --cfg job
46+
move-dl data=random_small task=random_small__latent
47+
- name: Identify associations - t-test
48+
run: |
49+
cd tutorial
50+
move-dl data=random_small task=random_small__id_assoc_ttest --cfg job
51+
move-dl data=random_small task=random_small__id_assoc_ttest task.training_loop.num_epochs=30 task.num_refits=4
52+
- name: Identify associations - bayes factors
53+
run: |
54+
cd tutorial
55+
move-dl data=random_small task=random_small__id_assoc_bayes --cfg job
56+
move-dl data=random_small task=random_small__id_assoc_bayes task.training_loop.num_epochs=30 task.num_refits=20
57+
run-tutorial-cont:
58+
name: Run tutorial - random_continuous
59+
runs-on: ubuntu-latest
60+
steps:
61+
- uses: actions/checkout@v4
62+
- uses: actions/setup-python@v5
63+
with:
64+
python-version: "3.11"
65+
- name: Install dependencies
66+
run: pip install .
67+
- name: Prepare tutorial data
68+
run: |
69+
cd tutorial
70+
move-dl data=random_continuous task=encode_data
71+
- name: Train model and analyze latent space
72+
run: |
73+
cd tutorial
74+
move-dl data=random_continuous task=random_continuous__latent --cfg job
75+
move-dl data=random_continuous task=random_continuous__latent
76+
- name: Identify associations - t-test
77+
run: |
78+
cd tutorial
79+
move-dl data=random_continuous task=random_continuous__id_assoc_ttest --cfg job
80+
move-dl data=random_continuous task=random_continuous__id_assoc_ttest task.training_loop.num_epochs=30 task.num_refits=4
81+
- name: Identify associations - bayes factors
82+
run: |
83+
cd tutorial
84+
move-dl data=random_continuous task=random_continuous__id_assoc_bayes --cfg job
85+
move-dl data=random_continuous task=random_continuous__id_assoc_bayes task.training_loop.num_epochs=30 task.num_refits=4
86+
- name: Identify associations - KS
87+
run: |
88+
cd tutorial
89+
move-dl data=random_continuous task=random_continuous__id_assoc_ks --cfg job
90+
move-dl data=random_continuous task=random_continuous__id_assoc_ks task.training_loop.num_epochs=30 task.num_refits=4
91+
792
publish:
893
name: Publish package
994
runs-on: ubuntu-latest
95+
if: startsWith(github.ref, 'refs/tags')
96+
needs:
97+
- format
98+
- lint
1099
steps:
11-
- uses: actions/checkout@v3
12-
- name: Publish package
13-
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
100+
- uses: actions/checkout@v4
101+
- uses: actions/setup-python@v5
102+
with:
103+
python-version: "3.11"
104+
- name: Install twine and build
105+
run: python -m pip install --upgrade twine build
106+
- name: Build
107+
run: python -m build
108+
- name: Publish package
14109
uses: pypa/gh-action-pypi-publish@release/v1
15110
with:
16111
user: __token__

.gitignore

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ tutorial/*
4040
!tutorial/notebooks/*.ipynb
4141
!tutorial/README.md
4242

43+
# Supplementary files
44+
supplementary_files/*.png
45+
supplementary_files/*.tsv
46+
supplementary_files/*.txt
47+
4348
# Virtual environment
4449
venv/
4550
virtualvenv/
@@ -48,6 +53,12 @@ virtualvenv/
4853
docs/build/
4954
docs/source/_templates/
5055

56+
# VS Code settings
57+
.vscode
58+
59+
# macOS
60+
.DS_Store
61+
5162
# Root folder
5263
/*.*
5364
!/.gitignore
@@ -58,3 +69,4 @@ docs/source/_templates/
5869
!/pyproject.toml
5970
!/requirements.txt
6071
!/setup.cfg
72+
!/.github

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ Medication data
4242

4343
## Installing MOVE package
4444

45-
MOVE is written in Python and can therefore be installed using `pip`:
45+
MOVE is written in Python and can be installed using `pip`:
4646

4747
```bash
4848
>>> pip install move-dl
@@ -78,11 +78,11 @@ MOVE has five-six steps:
7878
## How to run MOVE
7979

8080
Please refer to our [**documentation**](https://move-dl.readthedocs.io/) for
81-
examples and [tutorials](https://move-dl.readthedocs.io/tutorial/index.html)
81+
examples and [tutorials](https://move-dl.readthedocs.io/tutorial/index.html)
8282
on how to run MOVE.
8383

84-
Additionally, you can copy
85-
[this notebook](https://colab.research.google.com/drive/1RFWNsuGymCmppPsElBvDuA9zRbGskKmi?usp=sharing)
84+
Additionally, you can copy
85+
[this notebook](https://colab.research.google.com/drive/1RFWNsuGymCmppPsElBvDuA9zRbGskKmi?usp=sharing)
8686
and follow its instructions to get familiar with our pipeline.
8787

8888
# Data sets

docs/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939
html_theme = "sphinx_rtd_theme"
4040
html_theme_options = {
41-
"collapse_navigation" : False,
41+
"collapse_navigation": False,
4242
}
4343
html_static_path = []
4444

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
hydra-core>=1.2.0
2-
numpy>=1.21.5
2+
numpy>=1.21.5,<2
33
pandas>=1.4.2
44
torch>=1.11.0
55
matplotlib>=3.5.2

setup.cfg

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
[metadata]
22
name = move-dl
33
description = Multi-omics variational autoencoder
4+
long_description = file: README.md
5+
long_description_content_type = text/markdown
46
url = https://github.com/RasmussenLab/MOVE
57
classifiers =
68
Intended Audience :: Healthcare Industry
@@ -15,13 +17,13 @@ version = attr: move.__version__
1517
include_package_data = True
1618
install_requires =
1719
hydra-core
18-
numpy
20+
numpy<2
1921
pandas
2022
torch
2123
matplotlib
2224
seaborn
2325
scikit-learn
24-
scipy
26+
scipy>=1.10.0
2527

2628
package_dir =
2729
= src
@@ -34,3 +36,8 @@ where = src
3436
[options.entry_points]
3537
console_scripts =
3638
move-dl=move.__main__:main
39+
40+
[flake8]
41+
max-line-length = 88
42+
aggressive = 2
43+
extend-ignore = E203

src/move/__init__.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from __future__ import annotations
22

3-
__license__ = "MIT"
4-
__version__ = (1, 4, 10)
5-
__all__ = ["conf", "data", "models", "training_loop", "VAE"]
6-
73
HYDRA_VERSION_BASE = "1.2"
84

9-
from move import conf, data, models
10-
from move.models.vae import VAE
11-
from move.training.training_loop import training_loop
5+
from move import conf, data, models # noqa:E402
6+
from move.models.vae import VAE # noqa:E402
7+
from move.training.training_loop import training_loop # noqa:E402
8+
9+
__license__ = "MIT"
10+
__version__ = (1, 5, 0)
11+
__all__ = ["conf", "data", "models", "training_loop", "VAE"]

src/move/analysis/metrics.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,21 @@ def norm(x: np.ma.MaskedArray, axis: int = 1) -> np.ma.MaskedArray:
8181
1D array with the specified axis removed.
8282
"""
8383
return np.sqrt(np.sum(x**2, axis=axis))
84+
85+
86+
def get_2nd_order_polynomial(
87+
x_array: FloatArray, y_array: FloatArray, n_points=100
88+
) -> tuple[FloatArray, FloatArray, tuple[float, float, float]]:
89+
"""
90+
Given a set of x an y values, find the 2nd oder polynomial fitting best the data.
91+
92+
Returns:
93+
x_pol: x coordinates for the polynomial function evaluation.
94+
y_pol: y coordinates for the polynomial function evaluation.
95+
"""
96+
a2, a1, a = np.polyfit(x_array, y_array, deg=2)
97+
98+
x_pol = np.linspace(np.min(x_array), np.max(x_array), n_points)
99+
y_pol = np.array([a2 * x * x + a1 * x + a for x in x_pol])
100+
101+
return x_pol, y_pol, (a2, a1, a)

src/move/conf/main.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ hydra:
2020
job:
2121
config:
2222
override_dirname:
23+
item_sep: ";"
2324
exclude_keys:
2425
- experiment
2526

src/move/conf/schema.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@ class InputConfig:
2828
name: str
2929
weight: int = 1
3030

31+
3132
@dataclass
3233
class ContinuousInputConfig(InputConfig):
3334
scale: bool = True
35+
log2: bool = False
3436

3537

3638
@dataclass
@@ -185,6 +187,27 @@ class IdentifyAssociationsTTestConfig(IdentifyAssociationsConfig):
185187
num_latent: list[int] = MISSING
186188

187189

190+
@dataclass
191+
class IdentifyAssociationsKSConfig(IdentifyAssociationsConfig):
192+
"""Configure the Kolmogorov-Smirnov approach to identify associations.
193+
194+
Args:
195+
perturbed_feature_names: names of the perturbed features of interest.
196+
target_feature_names: names of the target features of interest.
197+
198+
Description:
199+
For each perturbed feature - target feature pair, we will plot:
200+
- Input vs. reconstruction correlation plot: to assess reconstruction
201+
quality of both target and perturbed features.
202+
- Distribution of reconstruction values for the target feature before
203+
and after the perturbation of the perturbed feature.
204+
205+
"""
206+
207+
perturbed_feature_names: list[str] = field(default_factory=list)
208+
target_feature_names: list[str] = field(default_factory=list)
209+
210+
188211
@dataclass
189212
class MOVEConfig:
190213
defaults: list[Any] = field(default_factory=lambda: [dict(data="base_data")])
@@ -237,6 +260,11 @@ def extract_names(configs: list[InputConfig]) -> list[str]:
237260
name="identify_associations_ttest_schema",
238261
node=IdentifyAssociationsTTestConfig,
239262
)
263+
cs.store(
264+
group="task",
265+
name="identify_associations_ks_schema",
266+
node=IdentifyAssociationsKSConfig,
267+
)
240268

241269
# Register custom resolvers
242270
OmegaConf.register_new_resolver("weights", extract_weights)

src/move/conf/task/identify_associations_bayes.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,5 @@ training_loop:
3232
- 25
3333
early_stopping: false
3434
patience: 0
35+
36+
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
defaults:
2+
- identify_associations_ks_schema
3+
4+
model:
5+
categorical_weights: ${weights:${data.categorical_inputs}}
6+
continuous_weights: ${weights:${data.continuous_inputs}}
7+
num_hidden:
8+
- 100
9+
num_latent: 50
10+
beta: 0.1
11+
dropout: 0.1
12+
cuda: false
13+
14+
training_loop:
15+
lr: 1e-4
16+
num_epochs: 200
17+
batch_dilation_steps:
18+
- 50
19+
- 100
20+
- 150
21+
kld_warmup_steps:
22+
- 15
23+
- 20
24+
- 25
25+
early_stopping: false
26+
patience: 0

src/move/conf/task/identify_associations_ttest.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,5 @@ training_loop:
3535
- 25
3636
early_stopping: false
3737
patience: 0
38+
39+

0 commit comments

Comments
 (0)