Skip to content

Commit 56e48dd

Browse files
committed
Bug category solved
1 parent f5e4d24 commit 56e48dd

File tree

1 file changed

+55
-54
lines changed

1 file changed

+55
-54
lines changed

PhenoFunctions_v4.py

Lines changed: 55 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,17 @@
1010
import umap
1111
import logging
1212
from flowsom import flowsom as flowsom
13+
import seaborn as sb
1314
import tempfile
15+
import matplotlib
16+
matplotlib.use('Agg')
1417
import matplotlib.pyplot as plt
1518
tmp = tempfile.NamedTemporaryFile()
1619
sc.settings.autoshow = False
1720
sc.settings.set_figure_params(dpi=300, facecolor='white',
1821
figsize=(10, 10))
1922
sc.settings.verbosity = 0
2023
warnings.filterwarnings("ignore", category=FutureWarning)
21-
import matplotlib
22-
matplotlib.use('Agg')
2324

2425

2526
class Cytophenograph:
@@ -46,9 +47,10 @@ def __init__(self, info_file, input_folder, output_folder, k_coef, marker_list,
4647
self.marker_array = None
4748
self.anndata_list = []
4849
self.outfig = None
49-
self.fileformat = "pdf"
50+
self.fileformat = "pdf" # insert svg to change figure format
5051
self.log = logging.getLogger()
5152
self.log.setLevel(logging.INFO)
53+
self.dpi = 100
5254
format = logging.Formatter("%(asctime)s %(threadName)-11s %(levelname)-10s %(message)s")
5355
#
5456
ch = logging.StreamHandler(sys.stdout)
@@ -129,40 +131,36 @@ def concatenate_dataframe(self,info_file, csv_list):
129131
for df in pandas_df_list]):
130132
try:
131133
for i in range(len(pandas_df_list)):
132-
# print(pandas_df_list[i].index[0][:-2]) sample id derivated
133134
# save column with Sample name in list
134135
Sample_list = info_file["Sample"].tolist()
135136
# check if Sample name are in the anndata index
136137
if pandas_df_list[i].index[0][:-2] in Sample_list:
137138
ann_tmp = anndata.AnnData(pandas_df_list[i])
138139
ann_tmp.obs['Sample'] = pandas_df_list[i].index[0][:-2]
139140
#
140-
cell_type = info_file['Cell_type'].loc[info_file['Sample']== pandas_df_list[i].index[0][:-2]]
141-
# ann_tmp.obs['Cell_type'] = cell_type.to_string().split(" ")[-1]
142-
ann_tmp.obs['Cell_type'] = ''.join(e for e in cell_type.to_string() if e.isalnum())
141+
cell_type = info_file['Cell_type'].loc[info_file['Sample'] == pandas_df_list[i].index[0][:-2]]
142+
ann_tmp.obs['Cell_type'] = ''.join(e for e in cell_type.to_string().split(" ")[-1] if e.isalnum())
143143
#
144-
exp = info_file['EXP'].loc[info_file['Sample']== pandas_df_list[i].index[0][:-2]]
145-
# ann_tmp.obs['EXP'] = exp.to_string().split(" ")[-1]
146-
ann_tmp.obs['EXP'] = ''.join(e for e in exp.to_string() if e.isalnum())
144+
exp = info_file['EXP'].loc[info_file['Sample'] == pandas_df_list[i].index[0][:-2]]
145+
ann_tmp.obs['EXP'] = ''.join(e for e in exp.to_string().split(" ")[-1] if e.isalnum())
147146
#
148-
id = info_file['ID'].loc[info_file['Sample']== pandas_df_list[i].index[0][:-2]]
149-
# ann_tmp.obs['ID'] = id.to_string().split(" ")[-1]
150-
ann_tmp.obs['ID'] = ''.join(e for e in id.to_string() if e.isalnum())
147+
id = info_file['ID'].loc[info_file['Sample'] == pandas_df_list[i].index[0][:-2]]
148+
ann_tmp.obs['ID'] = ''.join(e for e in id.to_string().split(" ")[-1] if e.isalnum())
151149
#
152150
time_point = info_file['Time_point'].loc[info_file['Sample'] == pandas_df_list[i].index[0][:-2]]
153-
ann_tmp.obs['Time_point'] = time_point.to_string().split(" ")[-1]
154-
ann_tmp.obs['Time_point'] = ''.join(e for e in time_point.to_string() if e.isalnum())
151+
#ann_tmp.obs['Time_point'] = time_point.to_string().split(" ")[-1]
152+
ann_tmp.obs['Time_point'] = ''.join(e for e in time_point.to_string().split(" ")[-1] if e.isalnum())
155153
#
154+
156155
condition = info_file['Condition'].loc[info_file['Sample'] == pandas_df_list[i].index[0][:-2]]
157-
# ann_tmp.obs['Condition'] = condition.to_string().split(" ")[-1]
158-
ann_tmp.obs['Condition'] = ''.join(e for e in condition.to_string() if e.isalnum())
156+
ann_tmp.obs['Condition'] = ''.join(e for e in condition.to_string().split(" ")[-1] if e.isalnum())
159157
#
160158
count = info_file['Count'].loc[info_file['Sample'] == pandas_df_list[i].index[0][:-2]]
161-
# ann_tmp.obs['Count'] = count.to_string().split(" ")[-1]
162-
ann_tmp.obs['Count'] = ''.join(e for e in count.to_string() if e.isalnum())
159+
ann_tmp.obs['Count'] = ''.join(e for e in count.to_string().split(" ")[-1] if e.isalnum())
163160
self.anndata_list.append(ann_tmp)
164161
else:
165-
self.log.error("Error, this file {0} is not in the column Sample of Infofile. \n Please check sample name and Infofile".format(pandas_df_list[i].index[0][:-2]))
162+
self.log.error("Error, this file {0} is not in the column Sample of Infofile. "
163+
"\n Please check sample name and Infofile".format(pandas_df_list[i].index[0][:-2]))
166164
sys.exit(1)
167165
tmp = self.anndata_list[0]
168166
self.anndata_list.pop(0)
@@ -172,11 +170,12 @@ def concatenate_dataframe(self,info_file, csv_list):
172170
else:
173171
self.adata = tmp.concatenate(self.anndata_list)
174172
self.adata.layers['raw_value'] = self.adata.X
175-
except (ValueError, Exception):
173+
except Exception as e:
176174
self.log.error("Error. Please check Info File Header or CSV header.")
175+
self.log.error("Exception - {0}\n".format(str(e)))
177176
sys.exit(1)
178177
else:
179-
self.log.error("Error. Please check Info File Header or CSV header.")
178+
self.log.error("Error. Please check Info File Header or CSV header.", exc_info=True)
180179
sys.exit(1)
181180
self.tmp_df = pd.DataFrame(self.adata.X, index=self.adata.obs.index)
182181
self.tmp_df.columns = self.adata.var_names
@@ -250,10 +249,6 @@ def plot_umap(self):
250249
palette=self.palette, legend_fontoutline=2, show=False, add_outline=False, frameon=False,
251250
legend_loc='on data', title="UMAP Plot",return_fig=False,
252251
s=50, save="_legend_on_data.".join(["".join([str(self.tool), "_cluster"]), self.fileformat]))
253-
sc.pl.umap(self.adata_subset, color="pheno_leiden",
254-
palette=self.palette, legend_fontoutline=2, show=False, add_outline=False, frameon=False,
255-
legend_loc='on data', title="UMAP Plot",return_fig=False,
256-
s=50, save="_legend_on_data.".join(["".join([str(self.tool), "_cluster"]), 'svg']))
257252
sc.pl.correlation_matrix(self.adata_subset, "pheno_leiden", show=False,
258253
save=".".join([self.tool, self.fileformat]))
259254
for _ in list(self.adata_subset.var_names.unique()):
@@ -272,17 +267,29 @@ def matrixplot(self):
272267
dendrogram=True, vmin=-2, vmax=2, cmap='RdBu_r', layer="scaled",
273268
show=False, swap_axes=False, return_fig=False,
274269
save=".".join(["matrixplot_mean_z_score", self.fileformat]))
275-
sc.pl.matrixplot(self.adata_subset, list(self.adata_subset.var_names), "pheno_leiden",
276-
dendrogram=True, vmin=-2, vmax=2, cmap='RdBu_r', layer="scaled",
277-
show=False, swap_axes=False, return_fig=False,
278-
save=".".join(["matrixplot_mean_z_score", 'svg']))
279270
sc.pl.matrixplot(self.adata_subset, list(self.adata_subset.var_names), "pheno_leiden",
280271
dendrogram=True, cmap='Blues', standard_scale='var',
281272
colorbar_title='column scaled\nexpression', layer="scaled",
282273
swap_axes=False, return_fig=False,
283274
show=False,
284275
save=".".join(["matrixplot_column_scaled_expression", self.fileformat]))
285276

277+
def plotdist(self):
278+
"""
279+
Plot histogram and scatter
280+
Returns:
281+
"""
282+
ax = self.adata.to_df().hist(bins=25, figsize=(20, 15))
283+
fig = ax.get_figure()
284+
fig.savefig("/".join([self.outfig,".".join(["MarkerHistograms",self.fileformat])]),
285+
dpi=self.dpi, bbox_inches='tight', facecolor='white', trasparent=True,
286+
format=self.fileformat)
287+
# ax = sb.pairplot(self.adata.to_df(), plot_kws={'alpha': 0.3})
288+
# ax.fig.set_size_inches(20,20)
289+
# ax.savefig("/".join([self.outfig, "MarkerPairPlot.pdf"]),
290+
# dpi=self.dpi, bbox_inches='tight',facecolor='white',trasparent=True,
291+
# format=self.fileformat)
292+
286293
def plot_frequency(self):
287294
"""
288295
@@ -296,16 +303,9 @@ def plot_frequency(self):
296303
ax1.set_ylabel("Cluster")
297304
ax1.grid(False)
298305
ax1.legend(bbox_to_anchor=(1.2, 1.0))
299-
if self.fileformat == "pdf":
300-
fig.savefig("/".join([self.outfig, "ClusterFrequencyNormalized.pdf"]),
301-
dpi=100, bbox_inches='tight',
302-
format=self.fileformat)
303-
fig.savefig("/".join([self.outfig, "ClusterFrequencyNormalized.svg"]),
304-
dpi=100, bbox_inches='tight',
305-
format='svg')
306-
else:
307-
fig.savefig("/".join([self.outfig, "ClusterFrequencyNormalized.svg"]),
308-
dpi=fig.dpi, bbox_inches='tight',format=self.fileformat)
306+
fig.savefig("/".join([self.outfig, ".".join(["ClusterFrequencyNormalized", self.fileformat])]),
307+
dpi=self.dpi, bbox_inches='tight',
308+
format=self.fileformat)
309309
fig, (ax2) = plt.subplots(1, 1, figsize=(17 / 2.54, 17 / 2.54))
310310
ax2 = self.adata_subset.obs.groupby("pheno_leiden")["Sample"].value_counts(normalize=False).unstack().plot.barh(stacked=True,
311311
legend=False,
@@ -315,13 +315,8 @@ def plot_frequency(self):
315315
ax2.set_ylabel("Cluster")
316316
ax2.grid(False)
317317
ax2.legend(bbox_to_anchor=(1.2, 1.0))
318-
if self.fileformat == "pdf":
319-
fig.savefig("/".join([self.outfig, "ClusterFrequencyNotNormalized.pdf"]),
320-
dpi=fig.dpi, bbox_inches='tight',
321-
format=self.fileformat)
322-
else:
323-
fig.savefig("/".join([self.outfig, "ClusterFrequencyNotNormalized.svg"]),
324-
dpi=fig.dpi, bbox_inches='tight',
318+
fig.savefig("/".join([self.outfig, ".".join(["ClusterFrequencyNotNormalized", self.fileformat])]),
319+
dpi=self.dpi, bbox_inches='tight',
325320
format=self.fileformat)
326321

327322
def runphenograph(self):
@@ -331,7 +326,8 @@ def runphenograph(self):
331326
"""
332327
self.log.info("Part2: Phenograph Clustering")
333328
self.log.info("Markers used for Phenograph clustering:")
334-
self.adata_subset = self.adata[:, self.markertoinclude].copy()
329+
self.adata_subset = self.adata[:,
330+
self.markertoinclude].copy()
335331
self.log.info(self.adata_subset.var_names)
336332
self.log.info("Markers excluded for Phenograph clustering:")
337333
self.log.info(self.marker_array)
@@ -351,10 +347,11 @@ def runphenograph(self):
351347
self.embedding = self.runumap()
352348
self.adata.obsm['X_umap'] = self.embedding
353349
self.adata_subset.obsm['X_umap'] = self.embedding
354-
self.tmp_df = pd.DataFrame(self.adata.X, columns=self.adata.var_names, index=self.adata.obs.index)
350+
self.tmp_df = pd.DataFrame(self.adata.X, columns=self.adata.var_names)
355351
self.tmp_df['UMAP_1'] = self.embedding[:, 0]
356352
self.tmp_df['UMAP_2'] = self.embedding[:, 1]
357-
self.tmp_df['Cluster_Phenograph'] = pd.DataFrame(self.adata.obs['Phenograph_cluster'])
353+
self.tmp_df['Cluster_Phenograph'] = self.adata_subset.obs['pheno_leiden']
354+
# self.plotdist()
358355
self.plot_umap()
359356
self.plot_frequency()
360357
self.matrixplot()
@@ -389,10 +386,11 @@ def runparc(self):
389386
self.embedding = self.runumap()
390387
self.adata.obsm['X_umap'] = self.embedding
391388
self.adata_subset.obsm['X_umap'] = self.embedding
392-
self.tmp_df = pd.DataFrame(self.adata.X, columns=self.adata.var_names, index=self.adata.obs.index)
389+
self.tmp_df = pd.DataFrame(self.adata.X, columns=self.adata.var_names)
393390
self.tmp_df['UMAP_1'] = self.embedding[:, 0]
394391
self.tmp_df['UMAP_2'] = self.embedding[:, 1]
395-
self.tmp_df['Cluster_Parc'] = pd.DataFrame(self.adata.obs['Parc_cluster'])
392+
self.tmp_df['Cluster_Parc'] = self.adata_subset.obs['pheno_leiden']
393+
# self.plotdist()
396394
self.plot_umap()
397395
self.plot_frequency()
398396
self.matrixplot()
@@ -447,10 +445,13 @@ def runflowsom(self):
447445
self.embedding = self.runumap()
448446
self.adata.obsm['X_umap'] = self.embedding
449447
self.adata_subset.obsm['X_umap'] = self.embedding
450-
self.tmp_df = pd.DataFrame(self.adata.X, columns=self.adata.var_names, index=self.adata.obs.index)
448+
self.embedding = self.runumap()
449+
self.adata.obsm['X_umap'] = self.embedding
450+
self.adata_subset.obsm['X_umap'] = self.embedding
451+
self.tmp_df = pd.DataFrame(self.adata.X, columns=self.adata.var_names)
451452
self.tmp_df['UMAP_1'] = self.embedding[:, 0]
452453
self.tmp_df['UMAP_2'] = self.embedding[:, 1]
453-
self.tmp_df['Cluster_Flowsom'] = pd.DataFrame(self.adata.obs['Cluster_Flowsom'])
454+
# self.plotdist()
454455
self.plot_umap()
455456
self.plot_frequency()
456457
self.matrixplot()

0 commit comments

Comments
 (0)