Skip to content

Commit 99e20b0

Browse files
authored
MEA with custom kinase-substrate sets (#42)
* Adding the kinase-substrate sets to the results * Adding a function to run MEA with custom kinase-substrate sets * Fixing misspelling * Converting kinase name in custom sets to uppercase for consistency * Fixing description misspelling * Bumping version
1 parent 4a43c5b commit 99e20b0

File tree

2 files changed

+86
-8
lines changed

2 files changed

+86
-8
lines changed

src/kinase_library/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
#%%
2525

26-
__version__ = "1.2.0"
26+
__version__ = "1.3.0"
2727

2828
#%% Loading scored phosphoproteome one time per session
2929

src/kinase_library/enrichment/mea.py

Lines changed: 85 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,8 @@ def mea(self, kin_type, kl_method, kl_thresh,
136136
137137
Returns
138138
-------
139-
enrichemnt_results : pd.DataFrame
140-
pd.Dataframe with results of MEA for the specified KL method and threshold.
139+
enrichment_results : pd.DataFrame
140+
pd.DataFrame with results of MEA for the specified KL method and threshold.
141141
"""
142142

143143
exceptions.check_kl_method(kl_method)
@@ -197,11 +197,86 @@ def mea(self, kin_type, kl_method, kl_thresh,
197197
enrichment_data['FDR'] = enrichment_data['FDR'].replace(0,enrichment_data['FDR'][enrichment_data['FDR'] != 0].min()).astype(float) #Setting FDR of zero to lowest FDR in data
198198
sorted_enrichment_data = enrichment_data.sort_values('Kinase').set_index('Kinase').reindex(data.get_kinase_list(kin_type, non_canonical=non_canonical))
199199

200-
enrichemnt_results = MeaEnrichmentResults(enrichment_results=sorted_enrichment_data, pps_data=self, gseapy_obj=prerank_results,
200+
enrichment_results = MeaEnrichmentResults(enrichment_results=sorted_enrichment_data, pps_data=self, kin_sub_sets=kin_sub_sets, gseapy_obj=prerank_results,
201201
kin_type=kin_type, kl_method=kl_method, kl_thresh=kl_thresh, tested_kins=kinases,
202202
data_att=data_att, kl_comp_direction=kl_comp_direction)
203203

204-
return enrichemnt_results
204+
return enrichment_results
205+
206+
207+
def mea_custom(self, custom_kin_sets,
208+
kinases=None, kin_type='custom',
209+
weight=1, threads=4, min_size=1, max_size=100000,
210+
permutation_num=1000, seed=112123,
211+
gseapy_verbose=False):
212+
"""
213+
Kinase enrichment analysis based on pre-ranked GSEA substrates list using custom kinase-substrate sets.
214+
215+
Parameters
216+
----------
217+
custom_kin_sets : dict
218+
A dictionary of custom kinase-substrate sets where keys are kinase names and values are lists of substrates.
219+
kinases : list, optional
220+
If provided, kinase enrichment will only be calculated for the specified kinases from custom_kin_sets. The default is None, which uses all kinases in custom_kin_sets.
221+
kin_type : str, optional
222+
A label to identify the type of custom kinase sets being used. The default is 'custom'.
223+
**GSEApy parameters: weight, threads, min_size, max_size, permutation_num, seed, gseapy_verbose
224+
225+
Returns
226+
-------
227+
enrichment_results : pd.DataFrame
228+
pd.DataFrame with results of MEA for the custom kinase-substrate sets.
229+
"""
230+
231+
if not isinstance(custom_kin_sets, dict) or not custom_kin_sets:
232+
raise ValueError('custom_kin_sets must be a non-empty dictionary with kinase names as keys and substrate lists as values.')
233+
234+
custom_kin_sets = {k.upper(): v for k, v in custom_kin_sets.items()}
235+
236+
if kinases is None:
237+
kinases = list(custom_kin_sets.keys())
238+
elif isinstance(kinases, str):
239+
kinases = [kinases]
240+
241+
kinases = [x.upper() for x in kinases]
242+
243+
filtered_kin_sets = {k: v for k, v in custom_kin_sets.items() if k in kinases}
244+
if not filtered_kin_sets:
245+
raise ValueError('No kinases from the provided list were found in custom_kin_sets.')
246+
247+
ranked_subs = self.dp_data_pps.data.set_index(_global_vars.default_seq_col)[self.rank_col].sort_values(ascending=False)
248+
249+
prerank_results = gp.prerank(rnk=ranked_subs,
250+
gene_sets=filtered_kin_sets,
251+
weight=weight,
252+
threads=threads,
253+
min_size=min_size,
254+
max_size=max_size,
255+
permutation_num=permutation_num,
256+
seed=seed,
257+
verbose=gseapy_verbose)
258+
259+
res_col_converter = {'Term': 'Kinase', 'ES': 'ES', 'NES': 'NES', 'NOM p-val': 'p-value', 'FDR q-val': 'FDR', 'Tag %': 'Subs fraction', 'Lead_genes': 'Leading substrates'}
260+
261+
enrichment_data = prerank_results.res2d.drop(['Name', 'FWER p-val', 'Gene %'], axis=1).rename(columns=res_col_converter)
262+
enrichment_data['p-value'] = enrichment_data['p-value'].replace(0,1/permutation_num).astype(float) #Setting p-value of zero to 1/(# of permutations)
263+
enrichment_data['FDR'] = enrichment_data['FDR'].replace(0,enrichment_data['FDR'][enrichment_data['FDR'] != 0].min()).astype(float) #Setting FDR of zero to lowest FDR in data
264+
sorted_enrichment_data = enrichment_data.sort_values('Kinase').set_index('Kinase')
265+
266+
enrichment_results = MeaEnrichmentResults(
267+
enrichment_results=sorted_enrichment_data,
268+
pps_data=self,
269+
kin_sub_sets=filtered_kin_sets,
270+
gseapy_obj=prerank_results,
271+
kin_type=kin_type,
272+
kl_method='custom',
273+
kl_thresh=None,
274+
tested_kins=kinases,
275+
data_att='custom',
276+
kl_comp_direction=None
277+
)
278+
279+
return enrichment_results
205280

206281
#%%
207282

@@ -215,6 +290,8 @@ class MeaEnrichmentResults(object):
215290
Dataframe containing Kinase Library enrichment results.
216291
pps_data : kl.EnrichmentData
217292
Object initialized from the foreground and background dataframes used to calculate provided enrichment_results.
293+
kin_sub_sets : dict
294+
Kinase-substrate sets used for the enrichment.
218295
kin_type : str
219296
Kinase type ('ser_thr' or 'tyrosine').
220297
kl_method : str
@@ -231,12 +308,13 @@ class MeaEnrichmentResults(object):
231308
Dictates if kinases above or below the specified threshold are used ('higher','lower').
232309
"""
233310

234-
def __init__(self, enrichment_results, pps_data, gseapy_obj,
235-
kin_type, kl_method, kl_thresh, tested_kins,
236-
data_att, kl_comp_direction):
311+
def __init__(self, enrichment_results, pps_data, kin_sub_sets,
312+
gseapy_obj, kin_type, kl_method, kl_thresh,
313+
tested_kins, data_att, kl_comp_direction):
237314

238315
self.enrichment_results = enrichment_results
239316
self.pps_data = pps_data
317+
self.kin_sub_sets = kin_sub_sets
240318
self.gseapy_obj = gseapy_obj
241319
self.kin_type = kin_type
242320
self.kl_method = kl_method

0 commit comments

Comments
 (0)