Skip to content

Commit 46c8deb

Browse files
authored
Merge pull request #43 from databio/dev
Version 1.1.0
2 parents cf720d8 + a803ab2 commit 46c8deb

25 files changed

+11700
-10386
lines changed

.gitignore

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,22 @@
1-
examples/sh_output*
2-
examples/py_output*
31
*.pyc
4-
site/
2+
3+
# BED files
4+
bedshifted*
55
*.bed
66
!tests/test.bed
7-
.DS_Store
7+
!tests/test2.bed
8+
!tests/small_test.bed
9+
!tests/small_test2.bed
10+
!tests/from_file.bed
11+
!tests/header_test.bed
12+
examples/sh_output*
13+
examples/py_output*
14+
15+
# Python build
16+
site/
817
build/
918
bedshift.egg-info/
1019
dist/
11-
bedshifted*
20+
21+
# MacOS
22+
.DS_Store

README.md

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@ Install from local repository: `pip install .`
88

99
## Command line
1010

11-
Run with: `bedshift -l hg38.chrom.sizes -b BEDFILE`
11+
Run with:
12+
13+
```
14+
bedshift -l tests/hg38.chrom.sizes -b tests/test.bed
15+
```
1216

1317
See `bedshift -h` for parameters.
1418

@@ -18,10 +22,10 @@ See `bedshift -h` for parameters.
1822
import bedshift
1923

2024
bedshifter = bedshift.Bedshift('tests/test.bed', 'hg38.chrom.sizes')
21-
bedshifter.all_perturbations(addrate=0.3, addmean=320.0, addstdev=20.0,
22-
shiftrate=0.3, shiftmean=-10.0, shiftstdev=120.0,
23-
cutrate=0.1,
24-
mergerate=0.11,
25+
bedshifter.all_perturbations(addrate=0.3, addmean=320.0, addstdev=20.0,
26+
shiftrate=0.3, shiftmean=-10.0, shiftstdev=120.0,
27+
cutrate=0.1,
28+
mergerate=0.11,
2529
droprate=0.03)
2630
# can also run single operations: shift, add, cut, merge, drop
2731

bedplot.R

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
library("data.table")
2+
library("ggplot2")
3+
4+
# This script produces visualizations of bedshift results (perturbed bed files) using R.
5+
6+
# Load in the files and process them, returning a table with all the regions.
7+
bedshiftread = function(startfile, randfiles){
8+
files = c(randfiles, startfile)
9+
nfiles = length(files)
10+
regionslist = lapply (files, fread)
11+
rowsperfile = sapply(regionslist, NROW)
12+
regionstable = rbindlist(regionslist, fill=TRUE)
13+
regionstable[,fileid:=rep(seq_len(nfiles), rowsperfile)]
14+
regionstable[,file:="random"]
15+
16+
starfileregions = seq(from=NROW(regionstable)+1-rowsperfile[length(rowsperfile)], to=NROW(regionstable))
17+
regionstable[starfileregions,file:="original"]
18+
return(regionstable)
19+
}
20+
21+
# Plot the results of the bedshiftread function
22+
bedshiftplot = function(regionstable) {
23+
ggplot(regionstable,
24+
aes(xmin=V2, xmax=V3, ymin=fileid, ymax=fileid+0.75, fill=file)) +
25+
geom_rect() +
26+
theme_classic() +
27+
scale_fill_manual(values=c("black", "gray")) +
28+
xlab("Genome") +
29+
ylab("Files") +
30+
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
31+
}
32+
33+
# Provide the original file (the one that's being perturbed)
34+
# and the filenames of all randomized files.
35+
36+
# Run the randomization with a command like this:
37+
# bedshift --verbosity 5 -b tests/simple_1.bed -d .3 -l tests/chrom_sizes_1 -r 10
38+
39+
startfile = "tests/simple_1.bed"
40+
randfiles = paste0("rep", 1:10, "_bedshifted_simple_1.bed")
41+
42+
pdf("drop_H.pdf", width=6, height=2)
43+
regionstable = bedshiftread(startfile, randfiles)
44+
bedshiftplot(regionstable)
45+
dev.off()

bedshift/BedshiftYAMLHandler.py

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
import os
2+
import sys
3+
import yaml
4+
import logging
5+
6+
7+
class BedshiftYAMLHandler(object):
8+
def __init__(self, bedshifter, yaml_fp, logger=None):
9+
"""
10+
Handles Bedshift perturbations from yaml files
11+
12+
:param bedshift.Bedshift bedshifter: a Bedshift object
13+
:param str yaml_fp: path to yaml file
14+
:param logging.logger logger: logger object
15+
"""
16+
self.bedshifter = bedshifter
17+
self.yaml_fp = yaml_fp
18+
if logger is not None:
19+
self._LOGGER = logger
20+
else:
21+
self._LOGGER = logging.getLogger("BedshiftYAMLHandler")
22+
23+
def _print_sample_config(self):
24+
"""
25+
bedshift_operations:
26+
- add:
27+
rate: 0.1
28+
mean: 100
29+
stdev: 20
30+
- drop_from_file:
31+
file: tests/test.bed
32+
rate: 0.1
33+
delimiter: \t
34+
- shift_from_file:
35+
file: bedshifted_test.bed
36+
rate: 0.3
37+
mean: 100
38+
stdev: 200
39+
- add_from_file:
40+
file: tests/small_test.bed
41+
rate: 0.2
42+
- cut:
43+
rate: 0.2
44+
- drop:
45+
rate: 0.30
46+
- shift:
47+
rate: 0.05
48+
mean: 100
49+
stdev: 200
50+
- merge:
51+
rate: 0.15
52+
"""
53+
self._LOGGER.info(self._print_sample_config.__doc__)
54+
55+
def _read_from_yaml(self, fp):
56+
with open(fp, "r") as yaml_file:
57+
config_data = yaml.load(yaml_file, Loader=yaml.FullLoader)
58+
self._LOGGER.info("Loaded configuration settings from {}".format(fp))
59+
return config_data
60+
61+
def handle_yaml(self):
62+
"""
63+
Performs perturbations provided in the yaml config file in the order they were provided.
64+
"""
65+
data = self._read_from_yaml(self.yaml_fp)
66+
operations = [operation for operation in data["bedshift_operations"]]
67+
num_changed = 0
68+
69+
for operation in operations:
70+
##### add #####
71+
if set(["add", "rate", "mean", "stdev"]) == set(list(operation.keys())):
72+
rate = operation["rate"]
73+
mean = operation["mean"]
74+
std = operation["stdev"]
75+
num_added = self.bedshifter.add(rate, mean, std)
76+
num_changed += num_added
77+
78+
##### add_from_file with no delimiter provided #####
79+
elif set(["add_from_file", "file", "rate"]) == set(list(operation.keys())):
80+
fp = operation["file"]
81+
if os.path.isfile(fp):
82+
add_rate = operation["rate"]
83+
num_added = self.bedshifter.add_from_file(fp, add_rate)
84+
num_changed += num_added
85+
else:
86+
self._logger.error("File '{}' does not exist.".format(fp))
87+
sys.exit(1)
88+
89+
##### add_from_file with delimiter provided #####
90+
elif set(["add_from_file", "file", "rate", "delimiter"]) == set(
91+
list(operation.keys())
92+
):
93+
fp = operation["file"]
94+
if os.path.isfile(fp):
95+
add_rate = operation["rate"]
96+
delimiter = operation["delimiter"]
97+
num_added = self.bedshifter.add_from_file(fp, add_rate, delimiter)
98+
num_changed += num_added
99+
else:
100+
self._logger.error("File '{}' does not exist.".format(fp))
101+
sys.exit(1)
102+
103+
##### drop #####
104+
elif set(["drop", "rate"]) == set(list(operation.keys())):
105+
rate = operation["rate"]
106+
num_dropped = self.bedshifter.drop(rate)
107+
num_changed += num_dropped
108+
109+
##### drop_from_file with no delimiter provided #####
110+
elif set(["drop_from_file", "file", "rate"]) == set(list(operation.keys())):
111+
fp = operation["file"]
112+
if os.path.isfile(fp):
113+
drop_rate = operation["rate"]
114+
num_dropped = self.bedshifter.drop_from_file(fp, drop_rate)
115+
num_changed += num_dropped
116+
else:
117+
self._LOGGER.error("File '{}' does not exist.".format(fp))
118+
sys.exit(1)
119+
120+
##### drop_from_file with delimiter provided #####
121+
elif set(["drop_from_file", "file", "rate", "delimiter"]) == set(
122+
list(operation.keys())
123+
):
124+
fp = operation["file"]
125+
if os.path.isfile(fp):
126+
drop_rate = operation["rate"]
127+
delimiter = operation["delimiter"]
128+
num_dropped = self.bedshifter.drop_from_file(
129+
fp, drop_rate, delimiter
130+
)
131+
num_changed += num_dropped
132+
else:
133+
self._LOGGER.error("File '{}' does not exist.".format(fp))
134+
sys.exit(1)
135+
136+
##### shift #####
137+
elif set(["shift", "rate", "mean", "stdev"]) == set(list(operation.keys())):
138+
rate = operation["rate"]
139+
mean = operation["mean"]
140+
std = operation["stdev"]
141+
num_shifted = self.bedshifter.shift(rate, mean, std)
142+
num_changed += num_shifted
143+
144+
##### shift_from_file #####
145+
elif set(["shift_from_file", "file", "rate", "mean", "stdev"]) == set(
146+
list(operation.keys())
147+
):
148+
fp = operation["file"]
149+
if os.path.isfile(fp):
150+
rate = operation["rate"]
151+
mean = operation["mean"]
152+
std = operation["stdev"]
153+
num_shifted = self.bedshifter.shift_from_file(fp, rate, mean, std)
154+
num_changed += num_shifted
155+
else:
156+
self._LOGGER.error("File '{}' does not exist.".format(fp))
157+
sys.exit(1)
158+
159+
##### shift_from_file with delimiter provided #####
160+
elif set(
161+
["shift_from_file", "file", "rate", "mean", "stdev", "delimiter"]
162+
) == set(list(operation.keys())):
163+
fp = operation["file"]
164+
if os.path.isfile(fp):
165+
rate = operation["rate"]
166+
mean = operation["mean"]
167+
std = operation["stdev"]
168+
delimiter = operation["delimiter"]
169+
num_shifted = self.bedshifter.shift_from_file(
170+
fp, rate, mean, std, delimiter
171+
)
172+
num_changed += num_shifted
173+
else:
174+
self._LOGGER.error("File '{}' does not exist.".format(fp))
175+
sys.exit(1)
176+
177+
##### cut #####
178+
elif set(["cut", "rate"]) == set(list(operation.keys())):
179+
rate = operation["rate"]
180+
num_cut = self.bedshifter.cut(rate)
181+
num_changed += num_cut
182+
183+
##### merge #####
184+
elif set(["merge", "rate"]) == set(list(operation.keys())):
185+
rate = operation["rate"]
186+
num_merged = self.bedshifter.merge(rate)
187+
num_changed += num_merged
188+
189+
else:
190+
self._LOGGER.error(
191+
"\n\nInvalid settings entered in the config file. Please refer to the example below.\n\n"
192+
)
193+
self._print_sample_config()
194+
sys.exit(1)
195+
196+
return num_changed

bedshift/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.1.0-dev"
1+
__version__ = "1.1.0"

0 commit comments

Comments
 (0)