A R package MMGS is developed by Mingjia Zhu. You can use this packages for Genomic selection across different envs whithin different model, such as rrBLUP, bayesian, LightGBM and so on.
A devlopment R packages used for GS within diff envs.
You can install the package from CRAN using the following command: From Github:
devtools::install_github("Ryougi-yukiro/MMGS")Provide examples of how to use your package. Include code snippets and brief explanations to demonstrate the key features and functionalities. You can also provide links to additional resources or documentation.
#Load the required packages
library("MMGS")
library("dplyr")# used for data reshape and melt
#Load the input test data
data(trait)
data(geno)
data(PTT_PTR)
data(env_info)#if you need estiamte H2 plz use asreml or VCA package
#example code as follow:
trait$Loc <- as.factor(trait$env_code)
trait$GDD <- as.numeric(as.character(trait$PH))
trait$Entry <- gsub("E", "", trait$line_code)
Loc=as.character(trait$Loc);
Entry=as.character(trait$Entry);
Rep=as.character(trait$pop_code);
y=trait$GDD;
mydataframe=data.frame(y,Loc,Entry,Rep);
fit_r <-remlMM(y ~ (Loc)+(Entry)+(Loc/Rep)+(Loc*Entry), mydataframe)
GxE_Var=fit_r$aov.tab[5,2];
G_Var=fit_r$aov.tab[3,2];
Error_Var=fit_r$aov.tab[6,2];
fit_a <-anovaMM(y ~ Loc+Loc/Rep+Entry+Loc*Entry, mydataframe)
fit_a
EN=unique(Loc);
GenoVarInd=numeric();
for(i in 1:7)
{
data1=mydataframe[mydataframe$Loc==EN[1],];
data1=data1[which(!(is.na(data1$y))),];
fit1 <-remlMM(y ~ (Entry)+(Rep), data1)
print(fit1)
#Variance component for genotypes###
GenoVar=fit1$aov.tab[2,2];
GenoVarInd=c(GenoVarInd,GenoVar)
}
#V
V=sum((sqrt(GenoVarInd)-mean(sqrt(GenoVarInd)))^2)/(length(EN)-1)
V.percent=V/GxE_Var;
V
V.percent
#L
L=GxE_Var-V;
L.percent=L/GxE_Var;
L
L.percent
#Line/Entry mean heritability
h2=G_Var/(G_Var+GxE_Var/length(EN)+Error_Var/(length(EN)*2));
h2
#Pooled genetic correlation
rg=G_Var/(G_Var+L)
rgenv_trait<-env_trait_calculate(data=trait,trait="FTgdd",env="env_code")
LbyE<-LbyE_calculate(data=trait,trait="FTgdd",env="env_code",line="line_code")
#envs similarity, plot by ggplot2 like heatmap
LbyE_corrplot(LbyE=LbyE,cor_type="phetamp",color=c("blue","white","red"))
#Get lines mean and Q25, Q75 within each envs
etl<-LbyE_Reshape(data=env_trait,LbyE=LbyE,env="env_code")
etl_plotter(data=etl,trait=env_trait)
#Get Best Days window and Env Paras
Paras <- c('DL', 'GDD', 'PTT', 'PTR', 'PTS');
#p dap_x dap_y searching_daps according to your data
pop_cor<-Exhaustive_search(data=env_trait, env_paras=PTT_PTR,
searching_daps=80,
p=1, dap_x=80,dap_y=80,LOO=0,Paras=Paras)
envMeanPara<-EPM(data=env_trait,env_paras=PTT_PTR,
max_d1=18,max_d2=43)Users can customize the model they need, the function uses the norm reaction by default, the given environment parameters can be obtained from the previous results , fold number represents the number of folds, reshuffle represents the number of repetitions. In RM.G mode, the available models are rrBLUP, LASSO,EN,RR,BA,BB,BC,BL,BRR,RKHS,MKRKHS,SVM,RF and LightGBM.
#Check pheno
pheno<-LbyE[which(as.character(LbyE$line_code)%in%
c("line_code",as.character(geno$line_code))),]
out<-MMGP(pheno=pheno,geno=geno,env=env_info,
para=envMeanPara,Para_Name="PTT",model="rrBLUP",
depend="Norm",fold=2,reshuffle=5,methods="RM.G",
ms1=2,ms2=2)
out2<-MMPrdM(pheno=pheno,geno=geno,
env=env_info,para=envMeanPara,
model="rrBLUPJ",depend="PEI",
Para_Name="PTT",reshuffle=2)
#result
#> mean(out[[3]])
#[1] 0.8690751
#> apply(out[[2]],2,mean)
# PR12 IA14 PR11 IA13 PR14S KS11 KS12
#0.5638706 0.3675908 0.5340269 0.4592765 0.4741423 0.5990176 0.6236456
#head(out[[1]])
# obs pre col para
#1 1409.773 1471.149 #FF0000 PR12
#2 1502.535 1515.962 #FF0000 PR12
#> mean(out2[[3]])
#[1] 0.9494913
#> apply(out2[[2]],2,mean)
# PR12 IA14 PR11 IA13 PR14S KS11 KS12
#0.6996221 0.6543900 0.7063727 0.8053814 0.7997450 0.8963921 0.9200798result<-line_trait_mean(data=trait,trait="FTgdd",mean=env_trait,LbyE=LbyE,row=2)
MSE<-result[[1]]
ltm<-result[[2]]
Reg<-Reg(LbyE=LbyE,env_trait=env_trait)See full documentation from original repository
export(EPM) export(Eh_plot) export(Exhaustive_search) export(LbyE_Reshape) export(LbyE_calculate) export(LbyE_corrplot) export(MMGP) export(MMPrdM) export(Mean_trait_plot) export(Reg) export(Reg_plot) export(eMP_plot) export(env_trait_calculate) export(etl_plotter) export(line_trait_mean) export(mse_plot)
ggGSE is a collection of tools for cross-environmental genome-wide selection prediction that integrates most genome-wide prediction models, both parametric and non-parametric. You can input your own collected data against sample data and get the results you want directly through the built-in functions of the toolkit, which requires no additional statistical knowledge or coding skills and is somewhat user-friendly because it saves users from having to search for various tools and apply them to cross-environmental prediction.
=======
A R package MMGS is developed by Mingjia Zhu. You can use this packages for Genomic selection across different envs whithin different model, such as rrBLUP, bayesian, LightGBM and so on.
A devlopment R packages used for GS within diff envs.
You can install the package from CRAN using the following command: From Github:
devtools::install_github("Ryougi-yukiro/MMGS")Provide examples of how to use your package. Include code snippets and brief explanations to demonstrate the key features and functionalities. You can also provide links to additional resources or documentation.
#Load the required packages
library("MMGS")
library("dplyr")# used for data reshape and melt
#Load the input data
trait<-read.table(file="Trait_records.txt",header=T)
env_info<-read.table(file="Env_meta_table.txt",header=T)
PTT_PTR <- read.table(file="7Envs_envParas_DAP122.txt", header = T , sep = "\t")
geno <- read.table(file="Genotype.txt", header = T , sep = "\t")h2<-h2_rrBLUP(trait=trait,geno=geno,envs="env_code")
#if you need estiamte H2 plz use asreml or VCA package
#example code as follow:
trait$Loc <- as.factor(trait$env_code)
trait$GDD <- as.numeric(as.character(trait$PH))
trait$Entry <- gsub("E", "", trait$line_code)
Loc=as.character(trait$Loc);
Entry=as.character(trait$Entry);
Rep=as.character(trait$pop_code);
y=trait$GDD;
mydataframe=data.frame(y,Loc,Entry,Rep);
fit_r <-remlMM(y ~ (Loc)+(Entry)+(Loc/Rep)+(Loc*Entry), mydataframe)
GxE_Var=fit_r$aov.tab[5,2];
G_Var=fit_r$aov.tab[3,2];
Error_Var=fit_r$aov.tab[6,2];
fit_a <-anovaMM(y ~ Loc+Loc/Rep+Entry+Loc*Entry, mydataframe)
fit_a
EN=unique(Loc);
GenoVarInd=numeric();
for(i in 1:7)
{
data1=mydataframe[mydataframe$Loc==EN[1],];
data1=data1[which(!(is.na(data1$y))),];
fit1 <-remlMM(y ~ (Entry)+(Rep), data1)
print(fit1)
#Variance component for genotypes###
GenoVar=fit1$aov.tab[2,2];
GenoVarInd=c(GenoVarInd,GenoVar)
}
#V
V=sum((sqrt(GenoVarInd)-mean(sqrt(GenoVarInd)))^2)/(length(EN)-1)
V.percent=V/GxE_Var;
V
V.percent
#L
L=GxE_Var-V;
L.percent=L/GxE_Var;
L
L.percent
#Line/Entry mean heritability
h2=G_Var/(G_Var+GxE_Var/length(EN)+Error_Var/(length(EN)*2));
h2
#Pooled genetic correlation
rg=G_Var/(G_Var+L)
rgenv_trait<-env_trait_calculate(data=trait,trait="FTgdd",env="env_code")
LbyE<-LbyE_calculate(data=trait,trait="FTgdd",env="env_code",line="line_code")
#envs similarity, plot by ggplot2 like heatmap
LbyE_corrplot(LbyE=LbyE,cor_type="phetamp",color=c("blue","white","red"))
#Get lines mean and Q25, Q75 within each envs
etl<-etl_calculate(data=env_info,trait="FTgdd",env="env_code",bycol="lat")
etl_plotter(data=etl,mean=env_trait) # you could design your plotter use more paras
#Get Best Days window and Env Paras
Paras <- c('DL', 'GDD', 'PTT', 'PTR', 'PTS');
#p dap_x dap_y searching_daps according to your data
pop_cor<-Exhaustive_search(data=env_trait, env_paras=PTT_PTR, searching_daps=80,
p=1, dap_x=80,dap_y=80,LOO=0,Paras=Paras)
#Result Visualization
Exhaustive_plotter(data=pop_cor,dap_x=80, dap_y=80,p=1)
#Get Envs mean within env paras
envMeanPara<-envMeanPara(data=env_trait, env_paras=PTT_PTR, maxR_dap1=18,maxR_dap2=43, Paras=Paras)Users can customize the model they need, the function uses the norm reaction by default, the given environment parameters can be obtained from the previous results , fold number represents the number of folds, reshuffle represents the number of repetitions. In RM.G mode, the available models are rrBLUP, LASSO,EN,RR,BA,BB,BC,BL,BRR,RKHS,MKRKHS,SVM,RF and LightGBM.
#Check pheno
pheno<-LbyE[which(as.character(LbyE$line_code)%in%c("line_code",as.character(geno$line_code))),];
#CV
out<-GE_CV(pheno=pheno, geno=geno, env=env_info,
para=envMeanPara, Para_Name="PTT", depend="norm",
model="rrBLUP", fold=2, reshuffle=5, methods="RM.G")
#result
#> mean(out[[3]])
#[1] 0.8728506
#> apply(out[[2]],2,mean)
# PR12 IA14 PR11 IA13 PR14S KS11 KS12
#0.5418663 0.3868576 0.5381628 0.4759335 0.4871427 0.6219213 0.6380658
#head(out[[1]])
# obs pre col para
#1 1595.988 1588.782 #FF0000 PR12
#2 1512.918 1576.437 #FF0000 PR12result<-line_trait_mean(data=trait,trait="FTgdd",mean=env_trait,LbyE=LbyE,row=2)
MSE<-result[[1]]
ltm<-result[[2]]
mse_plotter(MSE)
Reg<-Reg(LbyE=LbyE,env_trait=env_trait)
Reg_plotter(Reg=Reg)
Mean_trait_plot(Reg,MSE)
Slope_Intercept<-Slope_Intercept(data=filtered_trait,input=env_trait, env_paras=PTT_PTR,
Para_Name="PTS",line="line_code",trait="PH",filter=5,
maxR_dap1=22, maxR_dap2=35,rounds=4)
prdM <- LOOCV(maxR_dap1=22,maxR_dap2=35,data=filtered_trait,input=env_trait,env_paras=PTT_PTR,
Para_Name="PTS",trait="PH",line="line_code",p=1,filter=4)
prdM_plotter(prdM=prdM,data=envMeanPara,trait="PH",Para_Name="PTS");
envMeanPara_plotter(envMeanPara)See full documentation from original repository
- env_trait_calculate
- envMeanPara
- envMeanPara_plotter
- etl_calculate
- etl_plotter
- Exhaustive_plotter
- Exhaustive_search
- GE_CV
- h2_rrBLUP
- LbyE_calculate
- LbyE_corrplot
- line_trait_mean
- LOOCV
- ltm_plotter
- Mean_trait_plot
- mse_plotter
- prdM_plotter
- Reg
- Reg_plotter
- Slope_Intercept
ggGSE is a collection of tools for cross-environmental genome-wide selection prediction that integrates most genome-wide prediction models, both parametric and non-parametric. You can input your own collected data against sample data and get the results you want directly through the built-in functions of the toolkit, which requires no additional statistical knowledge or coding skills and is somewhat user-friendly because it saves users from having to search for various tools and apply them to cross-environmental prediction.