Skip to content

Commit 0557289

Browse files
authored
Merge pull request #145 from jhiemstrawisc/add-spras-image
Add new SPRAS image and handle unpacked singularity images
2 parents ad4da94 + 6cee79f commit 0557289

File tree

28 files changed

+444
-29
lines changed

28 files changed

+444
-29
lines changed

.github/workflows/test-spras.yml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ jobs:
8484
docker pull reedcompbio/allpairs:v2
8585
docker pull reedcompbio/domino:latest
8686
docker pull reedcompbio/py4cytoscape:v2
87+
docker pull reedcompbio/spras:v0.1.0
8788
- name: Build Omics Integrator 1 Docker image
8889
uses: docker/build-push-action@v1
8990
with:
@@ -156,6 +157,15 @@ jobs:
156157
tags: v2
157158
cache_froms: reedcompbio/py4cytoscape:latest
158159
push: false
160+
- name: Build SPRAS Docker image
161+
uses: docker/build-push-action@v1
162+
with:
163+
path: .
164+
dockerfile: docker-wrappers/SPRAS/Dockerfile
165+
repository: reedcompbio/spras
166+
tags: v0.1.0
167+
cache_froms: reedcompbio/spras:v0.1.0
168+
push: false
159169

160170
# Run pre-commit checks on source files
161171
pre-commit:
@@ -167,6 +177,6 @@ jobs:
167177
- name: Setup Python
168178
uses: actions/setup-python@v4
169179
with:
170-
python-version: '3.8' # Match this to the version specified in environment.yml
180+
python-version: '3.11' # Match this to the version specified in environment.yml
171181
- name: Run pre-commit checks
172182
uses: pre-commit/action@v3.0.0

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# See https://pre-commit.com/ for documentation
44
default_language_version:
55
# Match this to the version specified in environment.yml
6-
python: python3.8
6+
python: python3.11
77
repos:
88
- repo: https://github.com/pre-commit/pre-commit-hooks
99
rev: v4.4.0 # Use the ref you want to point at

Snakefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ rule reconstruct:
219219
# Create a copy so that the updates are not written to the parameters logfile
220220
params = reconstruction_params(wildcards.algorithm, wildcards.params).copy()
221221
# Add the input files
222-
params.update(dict(zip(runner.get_required_inputs(wildcards.algorithm), *{input})))
222+
params.update(dict(zip(runner.get_required_inputs(wildcards.algorithm), *{input}, strict=True)))
223223
# Add the output file
224224
# All run functions can accept a relative path to the output file that should be written that is called 'output_file'
225225
params['output_file'] = output.pathway_file

config/config.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ hash_length: 7
77
# 'singularity'. If container_framework is not specified, SPRAS will default to docker.
88
container_framework: docker
99

10+
# Only used if container_framework is set to singularity, this will unpack the singularity containers
11+
# to the local filesystem. This is useful when PRM containers need to run inside another container,
12+
# such as would be the case in an HTCondor/OSPool environment.
13+
# NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way
14+
# that persists after the workflow is complete. To clean up the unpacked containers, the user must
15+
# manually delete them.
16+
unpack_singularity: false
17+
1018
# Allow the user to configure which container registry containers should be pulled from
1119
# Note that this assumes container names are consistent across registries, and that the
1220
# registry being passed doesn't require authentication for pull actions

docker-wrappers/SPRAS/Dockerfile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
FROM almalinux:9
2+
3+
RUN dnf install -y epel-release
4+
5+
# gcc/g++ are required for building several of the packages if you're using apple silicon
6+
RUN dnf update -y && \
7+
dnf install -y gcc gcc-c++ \
8+
python3.11 python3.11-pip python3.11-devel \
9+
docker apptainer
10+
11+
COPY / /spras/
12+
RUN chmod -R 777 /spras
13+
WORKDIR /spras
14+
15+
# Install spras into the container
16+
RUN pip3.11 install .

docker-wrappers/SPRAS/README.md

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# SPRAS Docker image
2+
3+
## Building
4+
5+
A Docker image for SPRAS that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/spras)
6+
This image comes bundled with all of the necessary software packages to run SPRAS, and can be used for execution in distributed environments (like HTCondor).
7+
8+
To create the Docker image, make sure you are in this repository's root directory, and from your terminal run:
9+
10+
```
11+
docker build -t <project name>/<image name>:<tag name> -f docker-wrappers/SPRAS/Dockerfile .
12+
```
13+
14+
For example, to build this image with the intent of pushing it to DockerHub as reedcompbio/spras:v0.1.0, you'd run:
15+
```
16+
docker build -t reedcompbio/spras:v0.1.0 -f docker-wrappers/SPRAS/Dockerfile .
17+
```
18+
19+
This will copy the entire SPRAS repository into the container and install SPRAS with `pip`. As such, any changes you've made to the current SPRAS repository will be reflected in version of SPRAS installed in the container. Since SPRAS
20+
is being installed with `pip`, it's also possible to specify that you want development modules installed as well. If you're using the container for development and you want the optional `pre-commit` and `pytest` packages as well as a
21+
spras package that receives changes without re-installation, change the
22+
`pip` installation line to:
23+
24+
```
25+
pip install -e .[dev]
26+
```
27+
28+
This will cause changes to spras source code to update the installed package.
29+
30+
**Note:** This image will build for the same platform that is native to your system (i.e. amd64 or arm64). If you need to run this in a remote environment like HTCondor that is almost certainly `amd64` but you're building from Apple Silicon, it is recommended to either modify the Dockerfile to pin the platform:
31+
32+
```
33+
FROM --platform=linux/amd64 almalinux:9
34+
```
35+
36+
Or to temporarily override your system's default during the build, prepend your build command with:
37+
```
38+
DOCKER_DEFAULT_PLATFORM=linux/amd64
39+
```
40+
41+
For example, to build reedcompbio/spras:v0.1.0 on Apple Silicon as a linux/amd64 container, you'd run:
42+
```
43+
DOCKER_DEFAULT_PLATFORM=linux/amd64 docker build -t reedcompbio/spras:v0.1.0 -f docker-wrappers/SPRAS/Dockerfile .
44+
```
45+
46+
## Testing
47+
48+
The folder `docker-wrappers/SPRAS` also contains several files that can be used to test this container on HTCondor. To test the `spras` container
49+
in this environment, first login to an HTCondor Access Point (AP). Then, from the AP clone this repo:
50+
51+
```
52+
git clone https://github.com/Reed-CompBio/spras.git
53+
```
54+
55+
When you're ready to run SPRAS as an HTCondor workflow, navigate to the `spras/docker-wrappers/SPRAS` directory and create the `logs/` directory. Then run
56+
`condor_submit spras.sub`, which will submit SPRAS to HTCondor as a single job with as many cores as indicated by the `NUM_PROCS` line in `spras.sub`, using
57+
the value of `EXAMPLE_CONFIG` as the SPRAS configuration file. Note that you can alter the configuration file to test various workflows, but you should leave
58+
`unpack_singularity = true`, or it is likely the job will be unsuccessful. By default, the `example_config.yaml` runs everything except for `cytoscape`, which
59+
appears to fail periodically in HTCondor.
60+
61+
To monitor the state of the job, you can run `condor_q` for a snapshot of how the job is doing, or you can run `condor_watch_q` if you want realtime updates.
62+
Upon completion, the `output` directory from the workflow should be returned as `spras/docker-wrappers/SPRAS/output`, along with several files containing the
63+
workflow's logging information (anything that matches `logs/spras_*` and ending in `.out`, `.err`, or `.log`). If the job was unsuccessful, these files should
64+
contain useful debugging clues about what may have gone wrong.
65+
66+
**Note**: If you want to run the workflow with a different version of SPRAS, or one that contains development updates you've made, rebuild this image against
67+
the version of SPRAS you want to test, and push the image to your image repository. To use that container in the workflow, change the `container_image` line of
68+
`spras.sub` to point to the new image.
69+
70+
**Note**: In some cases, especially if you're encountering an error like `/srv//spras.sh: line 10: snakemake: command not found`, it may be necessary to convert
71+
the SPRAS image to a `.sif` container image before running someplace like the OSPool. To do this, run:
72+
```
73+
apptainer build spras.sif docker://reedcompbio/spras:v0.1.0
74+
```
75+
to produce the file `spras.sif`. Then, substitute this value as the `container_image` in the submit file.
76+
77+
## Versions:
78+
79+
The versions of this image match the version of the spras package within it.
80+
- v0.1.0: Created an image with SPRAS as an installed python module. This makes SPRAS runnable anywhere with Docker/Singularity. Note that the Snakefile should be
81+
runnable from any directory within the container.
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
# Global workflow control
2+
3+
# The length of the hash used to identify a parameter combination
4+
hash_length: 7
5+
6+
# Specify the container framework. Current supported versions include 'docker' and
7+
# 'singularity'. If container_framework is not specified, SPRAS will default to docker.
8+
container_framework: singularity
9+
10+
# Unpack singularity. See config/config.yaml for details.
11+
unpack_singularity: true
12+
13+
# Allow the user to configure which container registry containers should be pulled from
14+
# Note that this assumes container names are consistent across registries, and that the
15+
# registry being passed doesn't require authentication for pull actions
16+
container_registry:
17+
base_url: docker.io
18+
# The owner or project of the registry
19+
# For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs
20+
owner: reedcompbio
21+
22+
# This list of algorithms should be generated by a script which checks the filesystem for installs.
23+
# It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm
24+
# in the list to reduce the number of assumptions of the program at the cost of making the config a little more involved)
25+
# Each algorithm has an 'include' parameter. By toggling 'include' to true/false the user can change
26+
# which algorithms are run in a given experiment.
27+
#
28+
# algorithm-specific parameters are embedded in lists so that users can specify multiple. If multiple
29+
# parameters are specified then the algorithm will be run as many times as needed to cover all parameter
30+
# combinations. For instance if we have the following:
31+
# - name: "myAlg"
32+
# params:
33+
# include: true
34+
# a: [1,2]
35+
# b: [0.5,0.75]
36+
#
37+
# then myAlg will be run on (a=1,b=0.5),(a=1,b=0.75),(a=2,b=0.5), and (a=2,b=0,75). Pretty neat, but be
38+
# careful: too many parameters might make your runs take a long time.
39+
40+
algorithms:
41+
- name: "pathlinker"
42+
params:
43+
include: false
44+
run1:
45+
k: range(100,201,100)
46+
47+
- name: "omicsintegrator1"
48+
params:
49+
include: true
50+
run1:
51+
r: [5]
52+
b: [5, 6]
53+
w: np.linspace(0,5,2)
54+
g: [3]
55+
d: [10]
56+
57+
- name: "omicsintegrator2"
58+
params:
59+
include: true
60+
run1:
61+
b: [4]
62+
g: [0]
63+
run2:
64+
b: [2]
65+
g: [3]
66+
67+
- name: "meo"
68+
params:
69+
include: true
70+
run1:
71+
max_path_length: [3]
72+
local_search: ["Yes"]
73+
rand_restarts: [10]
74+
75+
- name: "mincostflow"
76+
params:
77+
include: true
78+
run1:
79+
flow: [1] # The flow must be an int
80+
capacity: [1]
81+
82+
- name: "allpairs"
83+
params:
84+
include: true
85+
86+
- name: "domino"
87+
params:
88+
include: true
89+
run1:
90+
slice_threshold: [0.3]
91+
module_threshold: [0.05]
92+
93+
94+
# Here we specify which pathways to run and other file location information.
95+
# DataLoader.py can currently only load a single dataset
96+
# Assume that if a dataset label does not change, the lists of associated input files do not change
97+
datasets:
98+
-
99+
label: data0
100+
node_files: ["node-prizes.txt", "sources.txt", "targets.txt"]
101+
# DataLoader.py can currently only load a single edge file, which is the primary network
102+
edge_files: ["network.txt"]
103+
# Placeholder
104+
other_files: []
105+
# Relative path from the spras directory
106+
data_dir: "input"
107+
# -
108+
# label: data1
109+
# # Reuse some of the same sources file as 'data0' but different network and targets
110+
# node_files: ["node-prizes.txt", "sources.txt", "alternative-targets.txt"]
111+
# edge_files: ["alternative-network.txt"]
112+
# other_files: []
113+
# # Relative path from the spras directory
114+
# data_dir: "input"
115+
116+
# If we want to reconstruct then we should set run to true.
117+
# TODO: if include is true above but run is false here, algs are not run.
118+
# is this the behavior we want?
119+
reconstruction_settings:
120+
121+
#set where everything is saved
122+
locations:
123+
124+
#place the save path here
125+
# TODO move to global
126+
reconstruction_dir: "output"
127+
128+
run: true
129+
130+
analysis:
131+
# Create one summary per pathway file and a single summary table for all pathways for each dataset
132+
summary:
133+
include: true
134+
# Create output files for each pathway that can be visualized with GraphSpace
135+
graphspace:
136+
include: true
137+
# Create Cytoscape session file with all pathway graphs for each dataset
138+
cytoscape:
139+
include: false
140+
# Machine learning analysis (e.g. clustering) of the pathway output files for each dataset
141+
ml:
142+
include: true
143+
# specify how many principal components to calculate
144+
components: 2
145+
# boolean to show the labels on the pca graph
146+
labels: true
147+
# 'ward', 'complete', 'average', 'single'
148+
# if linkage: ward, must use metric: euclidean
149+
linkage: 'ward'
150+
# 'euclidean', 'manhattan', 'cosine'
151+
metric: 'euclidean'

docker-wrappers/SPRAS/spras.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
3+
# Fail early if there's an issue
4+
set -e
5+
6+
# When .cache files are created, they need to know where HOME is to write there.
7+
# In this case, that should be the HTCondor scratch dir the job is executing in.
8+
export HOME=$(pwd)
9+
10+
snakemake "$@"

0 commit comments

Comments
 (0)