conda create -n gemprep python=3.6 matplotlib mpi4py numpy pandas r scikit-learn seaborn
source activate gemprep
git clone https://github.com/SystemsGenetics/GEMprep
cd GEMprep
cd /scratch/leiarar/gem-processing/GEMprep #change to the current working directory
wget https://ndownloader.figshare.com/files/9150631 #download normal kidney data from GTEX
mv 9150631 kidney-rsem-fpkm-gtex.txt.gz #download normal kidney data from GTEX
wget https://ndownloader.figshare.com/files/9150640 #download kidney tumor data from TCGA
mv 9150640 kirp-rsem-fpkm-tcga-t.txt.gz
gunzip kidney-rsem-fpkm-gtex.txt.gz #unzip the downloaded files
gunzip kirp-rsem-fpkm-tcga-t.txt.gz
python /scratch/leiarar/gem-processing/GEMprep/bin/merge.py kidney-rsem-fpkm-gtex.txt kirp-rsem-fpkm-tcga-t.txt kidney-gtex-kirp.txt #Merge the GTEX and TCGA GEMs
python /scratch/leiarar/gem-processing/GEMprep/bin/normalize.py kidney-gtex-kirp.txt kidney-gtex-kirp.quantile.txt --quantile #quantile normalization of the merged GEM
python /scratch/leiarar/gem-processing/GEMprep/bin/normalize.py kidney-gtex-kirp.quantile.txt kidney-gtex-kirp.quantile.log2.txt --log2 #transformation of the quantile-normalized GEM
head -n1 kidney-gtex-kirp.quantile.log2.txt | sed 's/\t/\n/g' | sed 's/-/,/g' | awk -F, '{print $1}' | awk 'NR>1' > labels.txt
head -n2 kidney-gtex-kirp.quantile.log2.txt > first_two_lines_gem.txt #check the file
These normalized matrices can now be used for the following:
1. Input files for discovery biomarkers.
2. Construction of gene co-expression networks (GCNs).
3. Differential gene expression analysis between normal and tumor samples or DEGs.
Leiara Rivera