Skip to content

Commit 8008cfd

Browse files
committed
Release GSEA 0.14.0-rc
1 parent debf894 commit 8008cfd

15 files changed

+50
-126
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "GSEA"
22
uuid = "f74a9ba8-6f1f-48f2-98b6-a79c46cc06f7"
33
authors = ["KwatMDPhD <kwat.me@icloud.com>"]
4-
version = "0.12.0"
4+
version = "0.14.0-rc"
55

66
[deps]
77
BioLab = "1fe83854-81c0-42f7-afc9-71ba9af673ca"

README.md

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -52,22 +52,6 @@ head -2 output/*.tsv
5252
open output/plot/*.html
5353
```
5454

55-
#### Alternatively, (instead of in command line) run this example in `julia`
56-
57-
```jl
58-
using GSEA
59-
60-
cd("example.sarcopenia")
61-
62-
GSEA.metric_rank(
63-
"metric_rank.json",
64-
"target_x_sample_x_number.tsv",
65-
"gene_x_sample_x_score.tsv",
66-
"set_genes.json",
67-
"output",
68-
)
69-
```
70-
7155
## Settings are just a [`.json` file](setting)
7256

7357
- `metric` for ranking genes (for `metric-rank`)
@@ -100,7 +84,7 @@ GSEA.metric_rank(
10084

10185
- `algorithm` for computing enrichment
10286

103-
`cidac` (_cumulative information divergence with antisymmetricity and complementation_) | `ks` (_Kolmogorov Smirnov_) | `ksa` (`ks` area)
87+
`KS` (_Kolmogorov Smirnov_) | `KSA` (`KS` area) | `KL` (cumulative information divergence) | `SKL` (_symmetric_ cumulative information divergence) | `AKL` (_antisymmetric_ cumulative information divergence)
10488

10589
- `number_of_jobs`
10690

example.sarcopenia/metric_rank.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"minimum_gene_set_size": 15,
77
"maximum_gene_set_size": 500,
88
"exponent": 1.0,
9-
"algorithm": "ks",
9+
"algorithm": "KS",
1010
"number_of_jobs": 1,
1111
"permutation": "sample",
1212
"random_seed": 20150603,

setting/data_rank.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
"minimum_gene_set_size": 15,
44
"maximum_gene_set_size": 500,
55
"exponent": 1.0,
6-
"algorithm": "cidac",
6+
"algorithm": "AKL",
77
"number_of_jobs": 1
88
}

setting/metric_rank.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"minimum_gene_set_size": 15,
77
"maximum_gene_set_size": 500,
88
"exponent": 1.0,
9-
"algorithm": "cidac",
9+
"algorithm": "AKL",
1010
"number_of_jobs": 1,
1111
"permutation": "sample",
1212
"random_seed": 20150603,

setting/user_rank.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"minimum_gene_set_size": 15,
66
"maximum_gene_set_size": 500,
77
"exponent": 1.0,
8-
"algorithm": "cidac",
8+
"algorithm": "AKL",
99
"number_of_jobs": 1,
1010
"random_seed": 20150603,
1111
"number_of_permutations": 100,

src/_filter_set!.jl

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
function _filter_set!(se_fe_, re, in_, mi, ma)
22

3-
#
43
println("Before filtering sets")
54

65
BioLab.Dict.print(se_fe_, 0)
76

8-
#
97
if re
108

119
println("Removing set genes not found in gene-x-sample genes")
@@ -18,7 +16,6 @@ function _filter_set!(se_fe_, re, in_, mi, ma)
1816

1917
end
2018

21-
#
2219
println("Keeping sets: $mi <= size <= $ma")
2320

2421
for (se, fe_) in se_fe_
@@ -31,7 +28,6 @@ function _filter_set!(se_fe_, re, in_, mi, ma)
3128

3229
end
3330

34-
#
3531
println("After")
3632

3733
BioLab.Dict.print(se_fe_, 0)

src/_plot_mountain.jl

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,15 @@
11
function _plot_mountain(se_x_st_x_nu, fe, sc, n_ex, pl_, al, fe_, sc_, se_fe_, sy_ar, di)
22

3-
#
43
n_se = size(se_x_st_x_nu, 1)
54

6-
#
75
n_ex = min(n_ex, n_se)
86

97
co_ = [1, 2]
108

11-
#
129
for ro in 1:n_ex
1310

1411
se, en = se_x_st_x_nu[ro, co_]
1512

16-
#
1713
if en <= 0 && !(se in pl_)
1814

1915
push!(pl_, se)
@@ -22,7 +18,6 @@ function _plot_mountain(se_x_st_x_nu, fe, sc, n_ex, pl_, al, fe_, sc_, se_fe_, s
2218

2319
end
2420

25-
#
2621
for ro in n_se:-1:(n_se - n_ex + 1)
2722

2823
se, en = se_x_st_x_nu[ro, co_]
@@ -35,26 +30,14 @@ function _plot_mountain(se_x_st_x_nu, fe, sc, n_ex, pl_, al, fe_, sc_, se_fe_, s
3530

3631
end
3732

38-
#
3933
pl = mkpath(joinpath(di, "plot"))
4034

41-
#
4235
pop!(sy_ar, :n_jo)
4336

44-
if al == "cidac"
45-
46-
fu = BioLab.FeatureSetEnrichment.score_set_new
47-
48-
elseif al == "ks"
49-
50-
fu = BioLab.FeatureSetEnrichment.score_set
51-
52-
end
53-
54-
#
5537
for se in pl_
5638

57-
fu(
39+
BioLab.FeatureSetEnrichment.score_set(
40+
al,
5841
fe_,
5942
sc_,
6043
se_fe_[se];

src/_tabulate_statistic.jl

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,21 @@
11
function _tabulate_statistic(se_en, se_ra_, ou)
22

3-
#
43
se_ = collect(keys(se_en))
54

65
en_ = collect(values(se_en))
76

8-
#
97
mkpath(ou)
108

11-
#
129
if isempty(se_ra_)
1310

14-
#
1511
gl_ = gla_ = fill(NaN, length(se_))
1612

1713
else
1814

19-
#
2015
ra__ = [collect(values(se_ra)) for se_ra in se_ra_]
2116

22-
#
2317
gl_, gla_ = BioLab.Significance.get_p_value_and_adjust(en_, vcat(ra__...))
2418

25-
#
2619
se_x_ra_x_en = DataFrame("Set" => se_)
2720

2821
insertcols!(se_x_ra_x_en, (string(id) => ra_ for (id, ra_) in enumerate(ra__))...)
@@ -31,7 +24,6 @@ function _tabulate_statistic(se_en, se_ra_, ou)
3124

3225
end
3326

34-
#
3527
se_x_st_x_nu = sort(
3628
DataFrame(
3729
"Set" => se_,

src/data_rank.jl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,10 @@ Run data-rank (single-sample) GSEA.
1010
"""
1111
@cast function data_rank(setting_json, gene_x_sample_x_score_tsv, set_genes_json, output_directory)
1212

13-
#
1413
ke_ar = BioLab.Dict.read(setting_json)
1514

16-
#
1715
fe_x_sa_x_sc = BioLab.Table.read(gene_x_sample_x_score_tsv)
1816

19-
#
2017
se_fe_ = BioLab.Dict.read(set_genes_json)
2118

2219
_filter_set!(
@@ -27,11 +24,10 @@ Run data-rank (single-sample) GSEA.
2724
ke_ar["maximum_gene_set_size"],
2825
)
2926

30-
#
3127
se_x_sa_x_en = BioLab.FeatureSetEnrichment.score_set(
28+
ke_ar["algorithm"],
3229
fe_x_sa_x_sc,
3330
se_fe_;
34-
al = ke_ar["algorithm"],
3531
_make_keyword_argument(ke_ar)...,
3632
)
3733

src/metric_rank.jl

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,15 @@ Run metric-rank (standard) GSEA.
2525
output_directory,
2626
)
2727

28-
#
2928
ke_ar = BioLab.Dict.read(setting_json)
3029

31-
#
3230
ta_, sat_, ta_x_sa_x_nu =
3331
BioLab.DataFrame.separate(BioLab.Table.read(target_x_sample_x_number_tsv))[[2, 3, 4]]
3432

3533
BioLab.Array.error_duplicate(ta_)
3634

3735
BioLab.Matrix.error_bad(ta_x_sa_x_nu, Real)
3836

39-
#
4037
fe_, saf_, fe_x_sa_x_sc =
4138
BioLab.DataFrame.separate(BioLab.Table.read(gene_x_sample_x_score_tsv))[[2, 3, 4]]
4239

@@ -46,10 +43,8 @@ Run metric-rank (standard) GSEA.
4643

4744
fe_x_sa_x_sc = fe_x_sa_x_sc[:, indexin(sat_, saf_)]
4845

49-
#
5046
mkpath(output_directory)
5147

52-
#
5348
bi_ = BitVector(ta_x_sa_x_nu[1, :])
5449

5550
me = ke_ar["metric"]
@@ -61,7 +56,6 @@ Run metric-rank (standard) GSEA.
6156
DataFrame("Gene" => fe_, me => sc_),
6257
)
6358

64-
#
6559
se_fe_ = BioLab.Dict.read(set_genes_json)
6660

6761
_filter_set!(
@@ -72,13 +66,12 @@ Run metric-rank (standard) GSEA.
7266
ke_ar["maximum_gene_set_size"],
7367
)
7468

75-
#
69+
al = ke_ar["algorithm"]
70+
7671
fe = ke_ar["feature_name"]
7772

7873
sc = ke_ar["score_name"]
7974

80-
al = ke_ar["algorithm"]
81-
8275
sy_ar = _make_keyword_argument(ke_ar)
8376

8477
pe = ke_ar["permutation"]
@@ -91,31 +84,23 @@ Run metric-rank (standard) GSEA.
9184

9285
pl_ = ke_ar["gene_sets_to_plot"]
9386

94-
#
9587
if pe == "sample"
9688

97-
#
98-
fu, id = BioLab.FeatureSetEnrichment._match_algorithm(al)
99-
100-
se_en = Dict(se => en[id] for (se, en) in fu(fe_, sc_, se_fe_; sy_ar...))
89+
se_en = BioLab.FeatureSetEnrichment.score_set(al, fe_, sc_, se_fe_; sy_ar...)
10190

102-
#
10391
if 0 < n_pe
10492

10593
println("Permuting $(pe)s to compute significance")
10694

107-
#
10895
seed!(ra)
10996

110-
#
11197
se_ra_ = [
112-
Dict(se => en[id] for (se, en) in se_en) for se_en in (
113-
fu(
114-
_compare_and_sort(shuffle!(bi_), fe_x_sa_x_sc, me, fe_)...,
115-
se_fe_;
116-
sy_ar...,
117-
) for _ in ProgressBar(1:n_pe)
118-
)
98+
BioLab.FeatureSetEnrichment.score_set(
99+
al,
100+
_compare_and_sort(shuffle!(bi_), fe_x_sa_x_sc, me, fe_)...,
101+
se_fe_;
102+
sy_ar...,
103+
) for _ in ProgressBar(1:n_pe)
119104
]
120105

121106
else
@@ -124,7 +109,6 @@ Run metric-rank (standard) GSEA.
124109

125110
end
126111

127-
#
128112
se_x_st_x_nu = _tabulate_statistic(se_en, se_ra_, output_directory)
129113

130114
_plot_mountain(
@@ -143,12 +127,10 @@ Run metric-rank (standard) GSEA.
143127

144128
se_x_st_x_nu
145129

146-
#
147130
elseif pe == "set"
148131

149-
user_rank(fe_, sc_, se_fe_, fe, sc, al, sy_ar, ra, n_pe, n_ex, pl_, output_directory)
132+
user_rank(al, fe_, sc_, se_fe_, fe, sc, sy_ar, ra, n_pe, n_ex, pl_, output_directory)
150133

151-
#
152134
else
153135

154136
error("`permutation` is not `sample` or `set`.")

0 commit comments

Comments
 (0)