Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,027 changes: 1,027 additions & 0 deletions shanson/Abl/Abl_sgill2_pipeline.ipynb

Large diffs are not rendered by default.

796 changes: 796 additions & 0 deletions shanson/Src/Src_sgill2_pipeline-allsims.ipynb

Large diffs are not rendered by default.

1,035 changes: 1,035 additions & 0 deletions shanson/Src/Src_sgill2_pipeline.ipynb

Large diffs are not rendered by default.

761 changes: 761 additions & 0 deletions shanson/T4/T4_Lysozyme_sgill2_pipeline-allsims.ipynb

Large diffs are not rendered by default.

1,066 changes: 1,066 additions & 0 deletions shanson/T4/T4_Lysozyme_sgill2_pipeline.ipynb

Large diffs are not rendered by default.

Binary file modified shanson/mek-10488/pyemma-finding4/pyemma-finding4-mek.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion shanson/mek-10488/pyemma-finding4/pyemma-finding4-mek.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@
################################################################################

plt.clf()
tics = running_tica.get_output()[0]
tics = running_tica.get_output()
tics = np.vstack(tics)

plt.hexbin(tics[:,0], tics[:, 1], bins='log')
plt.title("Dihedral tICA Analysis")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/cbio/jclab/home/hansons/opt/anaconda/lib/python2.7/site-packages/pyEMMA-2.0.1-py2.7-linux-x86_64.egg/pyemma/coordinates/util/stat.py:31: DeprecationWarning: Call to deprecated function hist. Called from pyemma.coordinates.util.stat line 31. Please use pyemma.coordinates.histogram()
def hist(transform, dimensions, nbins):
loading reference topology...
Initializing msmbuilder dihedrals featurizer...
There are 58840 frames total in 45 trajectories.
tICA...
calculate covariances: 85% (521/608) [#################################################################### ] eta 01:28 /2015-10-23 17:50:58,860 coordinates.transform.TICA[0] WARNING Had to skip 87 trajectories for being too short. Their indexes are in self._skipped_trajs.
This is the sum of the first two eigenvalues: 3.60002302104.
These are the input features most strongly correlated with each tIC:
tIC1: DIH: COS(TYR 172 N 2709 - TYR 172 CA 2711 - TYR 172 C 2728 - SER 173 N 2730)
with correlation of 0.850
tIC2: DIH: SIN(PRO 228 C 3569 - LEU 229 N 3571 - LEU 229 CA 3573 - LEU 229 C 3588)
with correlation of 0.846
tIC3: DIH: SIN(ASN 153 C 2428 - SER 154 N 2430 - SER 154 CA 2432 - SER 154 C 2439)
with correlation of -0.532
tIC4: DIH: COS(VAL 13 N 175 - VAL 13 CA 177 - VAL 13 C 189 - VAL 14 N 191)
with correlation of 0.784
getting output of TICA: 100% (608/608) [###############################################################################] eta 00:01 /
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import matplotlib
matplotlib.use('Agg')

import pyemma
import pyemma.coordinates as coor
import pyemma.plots as mplt

import numpy as np
import mdtraj as md
import matplotlib.pyplot as plt

from msmbuilder.featurizer import DihedralFeaturizer

from glob import glob
import os

# Source directory for MEK simulations
source_directory = '/cbio/jclab/projects/fah/fah-data/munged/no-solvent/10488'

################################################################################
# Load reference topology
################################################################################

print ('loading reference topology...')
reference_pdb_filename = 'reference.pdb'
reference_trajectory = os.path.join(source_directory, 'run0-clone0.h5')
traj = md.load(reference_trajectory)
traj[0].save_pdb(reference_pdb_filename)

################################################################################
# Define msmbuilder to pyemma translator
################################################################################

def msmbuilder_to_pyemma(msmbuilder_dih_featurizer,trajectory):
''' accepts an msmbuilder.featurizer.DihedralFeaturizer object + a trajectory (containing the topology
this featurizer will be applied to) and spits out an equivalent PyEMMA featurizer '''

all_indices = []
for dih_type in msmbuilder_dih_featurizer.types:
func = getattr(md, 'compute_%s' % dih_type)
indices,_ = func(trajectory)
all_indices.append(indices)

indices = np.vstack(all_indices)
sincos = msmbuilder_dih_featurizer.sincos

pyemma_feat = coor.featurizer(trajectory.topology)
pyemma_feat.add_dihedrals(indices,cossin=sincos)

return pyemma_feat

################################################################################
# Initialize featurizer
################################################################################

msmb_featurizer = DihedralFeaturizer(types=["phi", "psi", "chi1", "chi2"])

print('Initializing msmbuilder dihedrals featurizer...')
# create a PyEMMA featurizer
featurizer = msmbuilder_to_pyemma(msmb_featurizer,traj)

################################################################################
# Define coordinates source
################################################################################

trajectory_files = glob(os.path.join(source_directory, '*0.h5'))
coordinates_source = coor.source(trajectory_files,featurizer)
print("There are %d frames total in %d trajectories." % (coordinates_source.n_frames_total(), coordinates_source.number_of_trajectories()))

################################################################################
# Do tICA
################################################################################

print('tICA...')
running_tica = coor.tica(lag=1600, dim=4)
coor.pipeline([coordinates_source,running_tica])

################################################################################
# Make eigenvalues plot
################################################################################

plt.clf()
eigenvalues = (running_tica.eigenvalues)**2

sum_eigenvalues = np.sum(eigenvalues[0:2])

print "This is the sum of the first two eigenvalues: %s." % sum_eigenvalues

plt.plot(eigenvalues)
plt.xlim(0,4)
plt.ylim(0,1.2)
plt.annotate('sum first two: %s.' % sum_eigenvalues, xy=(0.25,0.1))
plt.savefig('pyemma-eigenvalues.png')

#############################################################################
# find the input features with the strongest correlation with tics
##############################################################################
feature_descriptors = featurizer.describe()
tica_correlations = running_tica.feature_TIC_correlation
n_tics = tica_correlations.shape[1]
best_indicator_indices = [np.argmax(np.abs(tica_correlations[:,i])) for i in range(n_tics)]
best_indicators = [feature_descriptors[ind] for ind in best_indicator_indices]
best_indicators_corr = [tica_correlations[best_indicator_indices[i],i] for i in range(n_tics)]

print("These are the input features most strongly correlated with each tIC:")
for i in range(n_tics):
print('\ttIC{0}: {1}\n\t\twith correlation of {2:.3f}'.format(i+1,best_indicators[i],best_indicators_corr[i]))

################################################################################
# Make tics plot
################################################################################

plt.clf()
tics = running_tica.get_output()
tics = np.vstack(tics)

plt.hexbin(tics[:,0], tics[:, 1], bins='log')
plt.title("Dihedral tICA Analysis")
plt.xlabel("tic1")
plt.ylabel("tic2")

plt.savefig("pyemma-finding4-msmbuilderdihedrals-mek.png", bbox_inches="tight")