Pipeline for quality Control and auxiliary scripts to run Linkage analysis in families using Merlin software (http://csg.sph.umich.edu/abecasis/Merlin/tour/input_files.html).
awk '{print "chr"$1"\t"$4}' FileReference.bim > File_UpdateID_POS.txt
for chr in (1..22) ; do vcftools --gzvcf TargetFile_chr${chr}.vcf.gz --positions File_UpdateID_POS.txt --recode --stdout | gzip -c > TargetFile_SNPsReference_chr${chr}.vcf.gz ; done
bcftools concat TargetFile_SNPsReference_chr${chr}.vcf.gz | bcftools view --max-alleles 2 -Oz -o TargetFile_ReferenceSNPs_BiallelicOnly.vcf.gz
plink2 --vcf TargetFile_ReferenceSNPs_BiallelicOnly.vcf.gz --make-bed --out TargetFile_ReferenceSNPs_BiallelicOnly
https://github.com/ldgh/Smart-Cleaning
- Remove chr 0
- Remove duplicate data
- Remove missing data
- Infer individual sex
- Remove A|T and C|G variants
- Remove 100% heterozigotes variants
- dbSNPname
- liftOver
for fam in 1 2 3 ; do plink --bfile Data_Autossomic_SmartQC_ReferenceSNPs_BiallelicOnly --keep Family${fam}.txt --make-bed --out Family${fam} ; done
- freqNonMonomorphic = 1/((N*2)*10) #N= Number of family members, 10x to have sure that just remove monomorphic
- freqNonMonomorphic =1/18 = 0.05
plink --bfile Family1 --maf 0.05 --make-bed --out Family1_NoMonomorphic
https://genome.sph.umich.edu/wiki/Regions_of_high_linkage_disequilibrium_(LD)
plink --bfile Family1_NoMonomorphic --exclude SNPs2Remove_LD.txt --make-bed --out Family1_NoMonomorphic_NoLDRegions
for pop in African EastAsia European NativeAmerican SouthAsia ; do plink --bfile Reference --indep-pairwise 100 10 0.1 --out ${pop}_Reference_LD0.1 ; done
for fam in 1 2 3 ; do plink --bfile Family${fam}_NoMonomorphic_NoLDRegions --extract European_Reference_LD0.1.prune.in --geno 0.01 --mind 0.01 --make-bed --out Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur ; done
for chr in (1..22) ; do awk -v chr="$chr" '{if($1=="chr"chr){print $2"\t"$3"\t"$4}}' /MapaGenetico/recomb-hg38/genetic_map_GRCh38_merged.tab > /MapaGenetico/genetic_map_GRCh38_chr${chr}_combined.txt ; done
for chr in (1..22) ; do cat /MapaGenetico/Header.txt /MapaGenetico/genetic_map_GRCh38_chr${chr}_combined.txt > /MapaGenetico/genetic_map_GRCh38_chr${chr}_combined_Header.txt ; done
for fam in 1 2 3 ; do for chr in (1..22) ; do plink --bfile Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur --chr ${chr} --make-bed --out Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr} ; done ; done
for fam in 1 2 3 ; do for chr in (1..22) ; do plink --bfile Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr} --cm-map /MapaGenetico/genetic_map_GRCh38_chr${chr}_combined_Header.txt ${chr} --make-bed --out Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map ; done ; done
for fam in 1 2 3 ; do for chr in {1..22} ; do plink --bfile Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map --recode --out Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map ; done ; done
for fam in 1 2 3 ; do for chr in {1..22} ; do awk '{print "M""\t"$2}' Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map.map > Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map.dat ; done ; done
for fam in 1 2 3 ; do for chr in {1..22} ; do cat Header.dat Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map.dat > Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map_H.dat ; done ; done
for fam in 1 2 3 ; do for chr in {1..22} ; do awk '{print $1"\t"$2"\t"$3}' Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map.map > Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map_merlin.map ; done ; done
Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map.ped
for fam in 1 2 3 ; do for chr in 22 ; do merlin-1.1.2/executables/pedstats -d Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map_H.dat -p Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map.ped ; done ; done
Linkage analysis tests for co-segregation of a chromosomal region and a trait locus of interest. In parametric linkage analysis, a specific disease model is used to describe segregation of the trait locus.
https://csg.sph.umich.edu/abecasis/merlin/tour/parametric.html
for fam in 1 2 3 ; do for chr in {1..22} ; do merlin-1.1.2/executables/merlin -d Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map_H.dat -p Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map.ped -m Family${fam}_NoMonomorphic_NoLDRegions_LD0.1_GenoMind_Eur_chr${chr}_map_merlin.map --model parametric.model --information --pdf --prefix Family${fam}_chr${chr}_ParametricModel_LDEUR ; done ; done