-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenetic_correlation_traits.R
92 lines (84 loc) · 4.23 KB
/
genetic_correlation_traits.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# Lindsay V. Clark
# November 15, 2016
# Feel free to modify and distribute. This R function comes with no warranty.
# Generalized function to estimate genetic correlation between traits measured on the same field
# plots (the same plants).
# from equation 9 of Howe et al. (2000; doi:10.1007/s001220051525)
# see also http://www.knowledgebank.irri.org/ricebreedingcourse/Lesson_8_Correlations_among_traits__implications_for_screening.htm
# formula: Should start with "Response ~" and then include variables to be used as predictors.
# Formatted for lme4. An example would be "Response ~ (1|Genotype) + (1|Loc/Rep) + (1|Genotype:Loc)"
# data: Data frame containing genotype IDs, traits, and any predictor variables.
# traitNames: Names of the columns in data containing traits between which we want to estimate genetic correlation.
# genIDName: Name of column in data containing genotype ID (for estimation of genetic components of
# variance and covariance). In the above example this would be "Genotype".
# subset: Subset of data to analyze. (Numeric, boolean, or character vector indicating rows to keep.)
genCorrTraits <- function(formula, data, traitNames, genIDName, subset = 1:dim(data)[1]){
require(lme4)
nTrait <- length(traitNames)
if(nTrait < 2){
stop("At least two traitNames needed.")
}
if(!all(traitNames %in% names(data))){
stop("All traitNames need to be column names of data.")
}
if(length(genIDName) != 1){
stop("Only one genIDName allowed.")
}
if(!genIDName %in% names(data)){
stop("genIDName not found in column names of data.")
}
data <- data[subset,]
nObs <- dim(data)[1]
# fill in variances along the diagonal only if there is no missing data at all
dovar <- !any(is.na(data[,traitNames]))
# manipulate formula to build new ones for linear models
OriginalTerms <- attr(terms(formula), "term.labels")
for(i in grep("|", OriginalTerms, fixed = TRUE)){ # fix formatting of random effects
OriginalTerms[i] <- paste("(", OriginalTerms[i], ")", sep = "")
}
# get genetic covariance and correlation for pairs of traits
gencov <- gencor <- matrix(NA, nrow = nTrait, ncol = nTrait,
dimnames = list(traitNames, traitNames))
for(tr1 in traitNames[-nTrait]){
for(tr2 in traitNames[(match(tr1, traitNames)+1):nTrait]){
message(paste("Evaluating", tr1, "with", tr2))
# get the genetic variances, just using plots where both traits were measured
obsBothTrait <- !is.na(data[[tr1]]) & !is.na(data[[tr2]])
if(sum(obsBothTrait) == 0) next
if(dovar && !is.na(gencov[tr1, tr1])){
var1 <- gencov[tr1, tr1] # don't recalculate variance if already done
} else {
thismodel <- lme4::lmer(reformulate(termlabels = OriginalTerms, response = tr1), data = data,
subset = obsBothTrait)
thisVC <- as.data.frame(VarCorr(thismodel))
var1 <- thisVC$vcov[match(genIDName, thisVC$grp)]
if(dovar) gencov[tr1, tr1] <- var1 # add variance to diagonal
}
if(dovar && !is.na(gencov[tr2, tr2])){
var2 <- gencov[tr2, tr2]
} else {
thismodel <- lme4::lmer(reformulate(termlabels = OriginalTerms, response = tr2), data = data,
subset = obsBothTrait)
thisVC <- as.data.frame(VarCorr(thismodel))
var2 <- thisVC$vcov[match(genIDName, thisVC$grp)]
if(dovar) gencov[tr2, tr2] <- var2
}
# now add the two variables together and get the genetic variance for the new trait
data$Response <- data[[tr1]] + data[[tr2]]
thismodel <- lme4::lmer(reformulate(termlabels = OriginalTerms, response = "Response"),
data = data)
thisVC <- as.data.frame(VarCorr(thismodel))
var12 <- thisVC$vcov[match(genIDName, thisVC$grp)]
# get the genetic covariance from these three variances
thisGenCov <- (var12 - var1 - var2)/2
gencov[tr1, tr2] <- thisGenCov
gencov[tr2, tr1] <- thisGenCov
# get genetic correlation
thisGenCor <- thisGenCov/sqrt(var1*var2)
gencor[tr1, tr2] <- thisGenCor
gencor[tr2, tr1] <- thisGenCor
}
}
return(list(genetic.covariance = gencov,
genetic.correlation = gencor))
}