Skip to content

Commit e5a61b3

Browse files
committed
fixes for running snap
1 parent b1ab055 commit e5a61b3

File tree

7 files changed

+87
-42
lines changed

7 files changed

+87
-42
lines changed

CHANGES

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ CHANGES
77
------
88

99
major changes to neoepitope routines
10-
changes to app outputs
10+
some changes to app outputs
1111
added netmhcpan predictor
1212
added basicmhc1 predictor
1313
updated notebook examples

epitopepredict/app.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ def __init__(self, opts={}):
2626
def setup(self):
2727
"""Setup main parameters"""
2828

29-
if check_snap() == True:
30-
add_path()
29+
#if base.check_snap() == True:
30+
#add_path()
3131
pd.set_option('display.width', 120)
3232
#override base.defaults entries if provided in conf
3333
set_defaults(self.__dict__)
@@ -69,6 +69,8 @@ def setup(self):
6969
self.names=None
7070
else:
7171
self.names = self.names.split(',')
72+
if self.names is not None:
73+
print ('selected sequences:', self.names)
7274

7375
if not os.path.exists(self.path) and self.path != '':
7476
os.mkdir(self.path)
@@ -359,13 +361,6 @@ def list_alleles():
359361
print ()
360362
return
361363

362-
def check_snap():
363-
"""Check if inside a snap"""
364-
365-
if 'SNAP_COMMON' in os.environ:
366-
return True
367-
return False
368-
369364
def add_path():
370365
"""Add home dir to path for accessing tools from a snap"""
371366

@@ -435,7 +430,9 @@ def main():
435430
help="Analysis path", metavar="FILE")
436431
parser.add_option("-n", "--neoepitope", dest="neoepitope", action="store_true",
437432
default=False, help="Neo-epitope pipeline")
438-
parser.add_option("-s", "--server", dest="server", action="store_true",
433+
parser.add_option("-e", "--ensembl", dest="ensembl", action="store_true",
434+
default=False, help="Get ensembl files for a release")
435+
parser.add_option("-s", "--server", dest="server",
439436
default=False, help="Run web app")
440437
parser.add_option("-x", "--port", dest="port", default=8000,
441438
help="Port for web app, default 8000")
@@ -467,7 +464,11 @@ def main():
467464
elif opts.neoepitope == True:
468465
if opts.test == True:
469466
neo.test_run()
467+
#neo.varcode_test()
470468
else:
469+
print (options)
470+
release = options['ensembl_release']
471+
neo.check_ensembl(release)
471472
W = neo.NeoEpitopeWorkFlow(options)
472473
st = W.setup()
473474
if st == True:

epitopepredict/base.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,26 @@ def split_peptides(df,length=9,seqkey='sequence',newcol='peptide'):
402402
res = df.merge(res,on=seqkey)
403403
return res
404404

405+
def check_snap():
406+
"""Check if inside a snap"""
407+
408+
if 'SNAP_COMMON' in os.environ:
409+
print ('running in a snap')
410+
return True
411+
return False
412+
413+
def set_netmhcpan_cmd():
414+
"""Setup the netmhcpan command for using inside snap. Avoids using
415+
tcsh script."""
416+
417+
toolspath = os.path.join('/home', os.environ['USER'], 'tools')
418+
netmhcpath = os.path.join(toolspath, 'netMHCpan-4.0/Linux_x86_64')
419+
os.environ['NETMHCpan']=netmhcpath
420+
os.environ['TMPDIR']='/tmp'
421+
cmd = os.path.join(netmhcpath, 'bin/netMHCpan')
422+
print ('netmhcpan cmd set to:', cmd)
423+
return cmd
424+
405425
class DataFrameIterator:
406426
"""Simple iterator to get dataframes from a path out of memory"""
407427
def __init__(self, files):
@@ -924,6 +944,8 @@ def _predict_sequences(self, recs, path=None, overwrite=True, alleles=[], length
924944

925945
results = []
926946
self.length = length
947+
if compression == '':
948+
compression = None
927949
for i,row in recs.iterrows():
928950
seq = row.translation
929951
seq = clean_sequence(seq) #clean the sequence of non-aa characters
@@ -1180,6 +1202,10 @@ def __init__(self, data=None, scoring='affinity'):
11801202
self.rankascending = 0
11811203
#load precalculated quantiles for sample peptides
11821204
self.qf = self.get_quantile_data()
1205+
self.basecmd = 'netMHCpan'
1206+
#base command needs to be to the binary directly if running snap
1207+
if check_snap is True:
1208+
self.basecmd = set_netmhcpan_cmd()
11831209

11841210
def read_result(self, temp):
11851211
"""Read raw results from netMHCpan output"""
@@ -1225,9 +1251,9 @@ def predict(self, peptides, allele='HLA-A*01:01', name='temp',
12251251
f.write(p+'\n')
12261252
f.close()
12271253
if self.scoring =='affinity':
1228-
cmd = 'netMHCpan -BA -f %s -inptype 1 -a %s' %(pepfile , allele)
1254+
cmd = '%s -BA -f %s -inptype 1 -a %s' %(self.basecmd, pepfile , allele)
12291255
else:
1230-
cmd = 'netMHCpan -f %s -inptype 1 -a %s' %(pepfile , allele)
1256+
cmd = '%s -f %s -inptype 1 -a %s' %(self.basecmd, pepfile , allele)
12311257
if show_cmd is True:
12321258
print (cmd)
12331259
try:

epitopepredict/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
'iedb_mhc1_method':'IEDB_recommended',
6060
'iedb_mhc2_method':'IEDB_recommended'}
6161

62-
baseoptions['neopredict'] = {'vcf_files':'',
62+
baseoptions['neopredict'] = {'vcf_files':'', 'ensembl_release':'75',
6363
'selection_method':'promiscuity'}
6464

6565
def write_default_config():

epitopepredict/neo.py

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def setup(self):
3535

3636
if check_imports() == False:
3737
return
38-
check_ensembl()
38+
#check_ensembl()
3939
pd.set_option('display.width', 120)
4040
base.iedbmhc1path = self.iedbmhc1_path
4141
base.iedbmhc2path = self.iedbmhc2_path
@@ -108,7 +108,7 @@ def run(self):
108108
variants = load_variants(vcf_file=infile)
109109
labels[f]['variants'] = len(variants)
110110
print ('getting variant effects')
111-
effects = get_variant_effects(variants, self.verbose)
111+
effects = get_variants_effects(variants, self.verbose)
112112
#serialize variant effects
113113
effects_to_pickle(effects, eff_obj)
114114
else:
@@ -360,8 +360,8 @@ def peptides_from_effect(eff, length=11, peptides=True, verbose=False):
360360
wt = orig[st:end]
361361
else:
362362
wt = None
363-
if verbose == True:
364-
print (type(eff), len(orig), len(mut), vloc, st, end, len(mutpep))
363+
#if verbose == True:
364+
# print (type(eff), len(orig), len(mut), vloc, st, end, len(mutpep))
365365
if len(mutpep)<length:
366366
if verbose == True:
367367
print ('peptide length too small')
@@ -562,26 +562,30 @@ def make_blastdb(url, name=None, filename=None, overwrite=False):
562562

563563
def make_human_blastdb():
564564
"""Human proteome blastdb"""
565+
565566
url = 'ftp://ftp.ensembl.org/pub/release-87/fasta/homo_sapiens/pep/Homo_sapiens.GRCh38.pep.all.fa.gz'
566567
filename = 'Homo_sapiens.GRCh38.pep.all.fa.gz'
567568
blastdb = make_blastdb(url, name='GRCh38', filename=filename)
568569
return blastdb
569570

570571
def make_virus_blastdb():
571572
"""Human virus blastdb"""
573+
572574
url = 'http://www.uniprot.org/uniprot/?sort=score&desc=&compress=no&query=taxonomy:%22Viruses%20[10239]%22%20\
573575
keyword:%22Reference%20proteome%20[KW-1185]%22%20host:%22Homo%20sapiens%20(Human)%20[9606]%22&fil=&force=no&preview=true&format=fasta'
574576
filename = 'uniprot_human_virus_proteome.fa.gz'
575577
blastdb = make_blastdb(url, name='human_virus', filename=filename)
576578
return blastdb
577579

578580
def self_matches(df, **kwargs):
581+
579582
blastdb = make_human_blastdb()
580583
x = find_matches(df, blastdb, **kwargs)
581584
x = x.rename(columns={'sseq':'self_match','mismatch':'self_mismatches'})
582585
return x
583586

584587
def virus_matches(df, **kwargs):
588+
585589
blastdb = make_virus_blastdb()
586590
x = find_matches(df, blastdb, **kwargs)
587591
if 'sseq' in x.columns:
@@ -642,12 +646,14 @@ def check_mm(x):
642646
return x
643647

644648
def wt_similarity(x, matrix='blosum62'):
649+
645650
x1 = x.peptide
646651
x2 = x.wt
647652
matrix = tepitope.get_matrix(matrix)
648653
return tepitope.similarity_score(matrix,x1,x2)
649654

650655
def self_similarity(x, matrix='blosum62'):
656+
651657
if x.self_match is None:
652658
return
653659
x1 = x.peptide
@@ -656,6 +662,7 @@ def self_similarity(x, matrix='blosum62'):
656662
return tepitope.similarity_score(matrix,x1,x2)
657663

658664
def virus_similarity(x, matrix='blosum62'):
665+
659666
if x.virus_match is None:
660667
return
661668
x1 = x.peptide
@@ -676,6 +683,7 @@ def anchor_mutated(x):
676683

677684
def summary_plots(df):
678685
"""summary plots for testing results"""
686+
679687
f,axs=plt.subplots(2,2,figsize=(10,10))
680688
axs=axs.flat
681689
g = df.groupby(['name']).size().sort_values(ascending=False)[:20]
@@ -697,42 +705,38 @@ def show_predictors():
697705
def check_imports():
698706
try:
699707
import varcode
700-
except:
708+
except Exception as e:
709+
print (e)
701710
print ('varcode required. please run pip install varcode')
702711
return False
703712
return True
704713

705-
def check_snap():
706-
"""Check if inside a snap"""
707-
708-
if 'SNAP_COMMON' in os.environ:
709-
return True
710-
return False
711-
712714
def fetch_ensembl_release(path=None, release='75'):
713-
"""get pyensembl genome files"""
715+
"""Get pyensembl genome files"""
714716

715717
from pyensembl import Genome,EnsemblRelease
716-
if path is not None:
717-
os.environ['PYENSEMBL_CACHE_DIR'] = path
718718
#this call should download the files
719719
genome = EnsemblRelease(release, species='human')
720-
genome.download()
721-
genome.index()
722-
#print ('pyensembl genome files cached in %s' %genome.cache_directory_path)
720+
genome.download(overwrite=False)
721+
genome.index(overwrite=False)
722+
genome.cache_directory_path = path
723+
print ('pyensembl genome files cached in %s' %genome.cache_directory_path)
724+
#run_pyensembl_install()
723725
return
724726

725-
def check_ensembl():
727+
def check_ensembl(release='75'):
726728
"""Check pyensembl ref genome cached. Needed for running in snap"""
727729

728730
#check if running inside a snap package so we can download
729731
#the genome files for pyensembl
730-
if check_snap() is True:
731-
#print ('running inside snap')
732-
home = os.path.join('/home', os.environ['USER'])
732+
cache_dir=None
733+
if base.check_snap() is True:
734+
#home = os.path.join('/home', os.environ['USER'])
735+
home = os.environ['SNAP_USER_COMMON']
733736
cache_dir = os.path.join(home, '.cache')
734-
print ('checking for ref human genome')
735-
fetch_ensembl_release(cache_dir)
737+
os.environ['PYENSEMBL_CACHE_DIR'] = cache_dir
738+
print ('checking for ref human genome')
739+
fetch_ensembl_release(cache_dir, release)
736740
return
737741

738742
def run_vep(vcf_file, out_format='vcf', assembly='GRCh38', cpus=4, path=None):
@@ -760,6 +764,7 @@ def print_help():
760764
print ("""use -h to get options""")
761765

762766
def plot_variant_summary(data):
767+
763768
from bokeh.plotting import figure
764769
from bokeh.charts import Donut
765770
d = Donut(df, label=['abbr', 'medal'], values='medal_count',
@@ -775,12 +780,22 @@ def test_run():
775780
options['base']['predictors'] = 'netmhcpan' #'mhcflurry'
776781
options['base']['mhc1_alleles'] = 'HLA-A*02:01'
777782
options['base']['path'] = 'neo_test'
783+
options['base']['overwrite'] = True
778784
#options['base']['mhc2_length'] = 11
779785
#options['base']['verbose'] = True
780786
#options['base']['cpus'] = 4
781787
options['neopredict']['vcf_files'] = os.path.join(path, 'testing','input.vcf')
782788
options = config.check_options(options)
783789
#print (options)
784790
W = NeoEpitopeWorkFlow(options)
791+
check_ensembl(release='75')
785792
st = W.setup()
793+
#check_ensembl()
786794
W.run()
795+
796+
def varcode_test():
797+
path = os.path.dirname(os.path.abspath(__file__))
798+
infile = os.path.join(path, 'testing','input.vcf')
799+
variants = load_variants(vcf_file=infile)
800+
get_variants_effects(variants)
801+
return

epitopepredict/peptutils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ def net_charge(seq):
143143
def compare_anchor_positions(x1, x2):
144144
"""Check if anchor positions in 9-mers are mutated"""
145145

146+
if x1 is None or x2 is None:
147+
return 0
146148
p1 = list(get_fragments(x1, length=9).peptide)
147149
p2 = list(get_fragments(x2, length=9).peptide)
148150
#is mutation in anchor residue

snap/snapcraft.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,21 @@ description: |
88
each method can then be processed and visualized in a consistent manner.
99
1010
grade: stable
11-
confinement: devmode
11+
confinement: strict
1212
icon: gui/icon.png
1313

1414
apps:
1515
epitopepredict:
1616
command: bin/epitopepredict
1717
plugs: [home,network-bind]
18-
18+
environment:
19+
LD_LIBRARY_PATH: $SNAP/usr/lib/ncbi-blast+
1920
parts:
2021
epitopepredict:
2122
plugin: python
2223
python-version: python3
2324
source: ../
2425
python-packages:
25-
[mhcflurry,gtfparse==0.0.6,pyensembl==1.1.0,varcode==0.5.15]
26+
[mhcflurry,gtfparse==1.2,pyensembl==1.7.3,varcode==0.8.0]
2627
stage-packages:
27-
[python-setuptools,tcsh,gawk,ncbi-blast+,bowtie2]
28+
[tcsh,gawk,ncbi-blast+]

0 commit comments

Comments
 (0)