Warning in as.POSIXlt.POSIXct(Sys.time()): unknown timezone 'zone/tz/2017c.
1.0/zoneinfo/America/Chicago'
Last updated: 2017-11-24
Code version: 963d26b
library(mashr)
Loading required package: ashr
set.seed(1)
simdata.equal = simple_sims(500,5,0.5)
# set mash data
TestdataBeta.equal = set_mash_data(simdata.equal$Bhat, simdata.equal$Shat, alpha=0)
TestdataZ.equal = set_mash_data(simdata.equal$Bhat, simdata.equal$Shat, alpha=1)
# center
TestdataZ.equal.center = set_mash_data(apply(as.matrix(TestdataZ.equal$Bhat), 2, function(x) x - mean(x)))
# canonical cov
U.c.equal = cov_canonical(TestdataBeta.equal)
# data_driven
m.1by1.Z.equal = mash_1by1(TestdataBeta.equal, alpha=1)
strong.Z.equal = get_significant_results(m.1by1.Z.equal,0.05)
U.pca.Z.equal = cov_pca(TestdataZ.equal.center,5,strong.Z.equal)
U.ed.beta.equal = cov_ed(TestdataBeta.equal, U.pca.Z.equal, strong.Z.equal)
U.m.beta.equal = mash(TestdataBeta.equal, c(U.c.equal,U.ed.beta.equal))
- Computing 2000 x 257 likelihood matrix.
- Likelihood calculations took 0.15 seconds.
- Fitting model with 257 mixture components.
- Model fitting took 0.58 seconds.
- Computing posterior matrices.
- Computation allocated took 0.01 seconds.
barplot(get_estimated_pi(U.m.beta.equal), las = 2, cex.names = 0.7, main='EE')
U.ed.Z.equal = cov_ed(TestdataZ.equal, U.pca.Z.equal, strong.Z.equal)
U.m.Z.equal = mash(TestdataZ.equal, c(U.c.equal, U.ed.Z.equal))
- Computing 2000 x 257 likelihood matrix.
- Likelihood calculations took 0.06 seconds.
- Fitting model with 257 mixture components.
- Model fitting took 0.48 seconds.
- Computing posterior matrices.
- Computation allocated took 0.01 seconds.
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
barplot(get_estimated_pi(U.m.Z.equal), las = 2, cex.names = 0.7, main='EZ')
The estimated weights are similar.
set.seed(1)
simdata.col.diff = simple_sims(500, 5, rep(c(0.1,0.08,0.15,0.12,0.06), each=2000))
vhat = 0
simdata.col.diff$z = simdata.col.diff$Bhat/simdata.col.diff$Shat
if (vhat == 1) {
V = cor(simdata.col.diff$z[which(apply(abs(simdata.col.diff$z),1, max) < 2),])
} else {
V = diag(ncol(simdata.col.diff$z))
}
TestdataBeta.col.diff = set_mash_data(Bhat=simdata.col.diff$Bhat,
Shat=simdata.col.diff$Shat,
V=as.matrix(V),
alpha = 0)
TestdataZ.col.diff = set_mash_data(Bhat=simdata.col.diff$Bhat,
Shat=simdata.col.diff$Shat,
V=as.matrix(V),
alpha = 1)
# center
TestdataZ.col.diff.center = set_mash_data(Bhat = apply(as.matrix(TestdataZ.col.diff$Bhat), 2, function(x) x - mean(x)),
V = as.matrix(V))
# canonical cov
U.c.col.diff = cov_canonical(TestdataBeta.col.diff)
# data_driven
m.1by1.Z.col.diff = mash_1by1(TestdataBeta.col.diff, alpha=1)
strong.Z.col.diff = get_significant_results(m.1by1.Z.col.diff,0.05)
U.pca.Z.col.diff = cov_pca(TestdataZ.col.diff.center,5,strong.Z.col.diff)
U.ed.beta.col.diff = cov_ed(TestdataBeta.col.diff, U.pca.Z.col.diff, strong.Z.col.diff)
U.m.beta.col.diff = mash(TestdataBeta.col.diff, c(U.c.col.diff, U.ed.beta.col.diff))
- Computing 2000 x 353 likelihood matrix.
- Likelihood calculations took 0.06 seconds.
- Fitting model with 353 mixture components.
- Model fitting took 0.46 seconds.
- Computing posterior matrices.
- Computation allocated took 0.01 seconds.
barplot(get_estimated_pi(U.m.beta.col.diff), las = 2, cex.names = 0.7, main='EE')
U.ed.Z.col.diff = cov_ed(TestdataZ.col.diff, U.pca.Z.col.diff, strong.Z.col.diff)
U.m.Z.col.diff = mash(TestdataZ.col.diff, c(U.c.col.diff,U.ed.Z.col.diff))
- Computing 2000 x 353 likelihood matrix.
- Likelihood calculations took 0.07 seconds.
- Fitting model with 353 mixture components.
- Model fitting took 0.43 seconds.
- Computing posterior matrices.
- Computation allocated took 0.01 seconds.
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
barplot(get_estimated_pi(U.m.Z.col.diff), las = 2, cex.names = 0.7, main='EZ')
The estimated weights are different.
library('corrplot')
corrplot 0.84 loaded
x <- cov2cor(U.m.Z.col.diff$fitted_g$Ulist[["ED_tPCA"]])
x[x > 1] <- 1
x[x < -1] <- -1
corrplot.mixed(x, tl.pos="d",upper='color',cl.lim=c(0.3,1), upper.col=colorRampPalette(rev(c("#D73027","#FC8D59","#FEE090","#FFFFBF",
"#E0F3F8","#91BFDB","#4575B4")))(40),
tl.cex=1.2)
svd.out = svd(U.m.Z.col.diff$fitted_g$Ulist[["ED_tPCA"]])
v = svd.out$v
options(repr.plot.width=10, repr.plot.height=5)
for (j in 1:1)
barplot(v[,j]/v[,j][which.max(abs(v[,j]))], cex.names = 0.7,
las = 2, main = paste0("EigenVector ", j, " for PCA-based covariance matrix"))
Check significant samples under transformation A
A = rbind(c(1,-1,0,0,0))
subset.data = get_significant_results(U.m.beta.col.diff)
simdata.col.diff.subset = simdata.col.diff
simdata.col.diff.subset$Bhat = simdata.col.diff$Bhat[subset.data,]
simdata.col.diff.subset$Shat = simdata.col.diff$Shat[subset.data,]
TestdataBeta.col.diff.subset = set_mash_data(simdata.col.diff.subset$Bhat, simdata.col.diff.subset$Shat, alpha = 0)
TestdataZ.col.diff.subset = set_mash_data(simdata.col.diff.subset$Bhat, simdata.col.diff.subset$Shat, alpha = 1)
# EE
U.m.beta.col.diff.posterior = mash_compute_posterior_matrices(U.m.beta.col.diff, TestdataBeta.col.diff.subset, A=A, algorithm.version = 'R')
U.m.beta.col.diff$result = U.m.beta.col.diff.posterior
# EZ
U.m.Z.col.diff.posterior = mash_compute_posterior_matrices(U.m.Z.col.diff, TestdataZ.col.diff.subset, A=A, algorithm.version = 'R')
U.m.Z.col.diff$result = U.m.Z.col.diff.posterior
length(get_significant_results(U.m.beta.col.diff))
[1] 823
length(get_significant_results(U.m.Z.col.diff))
[1] 1098
Comparing loglikelihood
get_loglik(U.m.beta.col.diff)
[1] -1860.587
get_loglik(U.m.Z.col.diff)
[1] -3349.742
EE model has higher likelihood, since the data is simulated under EE.
set.seed(1)
simdata.samp.diff = simple_sims(500, 5, rep(c(0.5,0.4,5,1,1), 400))
vhat = 0
simdata.samp.diff$z = simdata.samp.diff$Bhat/simdata.samp.diff$Shat
if (vhat == 1) {
V = cor(simdata.samp.diff$z[which(apply(abs(simdata.samp.diff$z),1, max) < 2),])
} else {
V = diag(ncol(simdata.samp.diff$z))
}
TestdataBeta.samp.diff = set_mash_data(Bhat=simdata.samp.diff$Bhat,
Shat=simdata.samp.diff$Shat,
V=as.matrix(V),
alpha = 0)
TestdataZ.samp.diff = set_mash_data(Bhat=simdata.samp.diff$Bhat,
Shat=simdata.samp.diff$Shat,
V=as.matrix(V),
alpha = 1)
# center
TestdataZ.samp.diff.center = set_mash_data(Bhat = apply(as.matrix(TestdataZ.samp.diff$Bhat), 2, function(x) x - mean(x)))
# canonical cov
U.c.samp.diff = cov_canonical(TestdataBeta.samp.diff)
# data_driven from Z
m.1by1.Z.samp.diff = mash_1by1(TestdataBeta.samp.diff, alpha=1)
strong.Z.samp.diff = get_significant_results(m.1by1.Z.samp.diff,0.05)
U.pca.Z.samp.diff = cov_pca(TestdataZ.samp.diff.center,5,strong.Z.samp.diff)
U.ed.beta.samp.diff = cov_ed(TestdataBeta.samp.diff, U.pca.Z.samp.diff, strong.Z.samp.diff)
U.m.beta.samp.diff = mash(TestdataBeta.samp.diff,
c(U.c.samp.diff, U.ed.beta.samp.diff))
- Computing 2000 x 337 likelihood matrix.
- Likelihood calculations took 0.66 seconds.
- Fitting model with 337 mixture components.
- Model fitting took 0.81 seconds.
- Computing posterior matrices.
- Computation allocated took 0.10 seconds.
barplot(get_estimated_pi(U.m.beta.samp.diff), las = 2, cex.names = 0.7, main='EE')
U.ed.Z.samp.diff = cov_ed(TestdataZ.samp.diff, U.pca.Z.samp.diff, strong.Z.samp.diff)
U.m.Z.samp.diff = mash(TestdataZ.samp.diff, c(U.c.samp.diff, U.ed.Z.samp.diff))
- Computing 2000 x 257 likelihood matrix.
- Likelihood calculations took 0.05 seconds.
- Fitting model with 257 mixture components.
Warning in REBayes::KWDual(A, rep(1, k), normalize(w), control = control): estimated mixing distribution has some negative values:
consider reducing rtol
Warning in mixIP(matrix_lik = structure(c(0.796931310599491,
0.850310036770056, : Optimization step yields mixture weights that are
either too small, or negative; weights have been corrected and renormalized
after the optimization.
- Model fitting took 0.46 seconds.
- Computing posterior matrices.
- Computation allocated took 0.08 seconds.
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
barplot(get_estimated_pi(U.m.Z.samp.diff), las = 2, cex.names = 0.7, main='EZ')
The estiamted weights are different if the Shat is not constant.
Comparing loglikelihood
get_loglik(U.m.beta.samp.diff)
[1] -16912.39
get_loglik(U.m.Z.samp.diff)
[1] -17164.99
Again, EE model has higher likelihood, since the data is simulated under EE.
Check significant samples under transformation A
A = rbind(c(1,-1,0,0,0))
subset.data = get_significant_results(U.m.beta.samp.diff)
simdata.samp.diff.subset = simdata.samp.diff
simdata.samp.diff.subset$Bhat = simdata.samp.diff$Bhat[subset.data,]
simdata.samp.diff.subset$Shat = simdata.samp.diff$Shat[subset.data,]
TestdataBeta.samp.diff.subset = set_mash_data(simdata.samp.diff.subset$Bhat, simdata.samp.diff.subset$Shat, alpha = 0)
TestdataZ.samp.diff.subset = set_mash_data(simdata.samp.diff.subset$Bhat, simdata.samp.diff.subset$Shat, alpha = 1)
# EE
U.m.beta.samp.diff.posterior = mash_compute_posterior_matrices(U.m.beta.samp.diff, TestdataBeta.samp.diff, A=A, algorithm.version = 'R')
U.m.beta.samp.diff$result = U.m.beta.samp.diff.posterior
# EZ
U.m.Z.samp.diff.posterior = mash_compute_posterior_matrices(U.m.Z.samp.diff, TestdataZ.samp.diff, A=A, algorithm.version = 'R')
U.m.Z.samp.diff$result = U.m.Z.samp.diff.posterior
length(get_significant_results(U.m.beta.samp.diff))
[1] 133
length(get_significant_results(U.m.Z.samp.diff))
[1] 129
Simulation function
simple_sims_alpha = function(nsamp = 100, ncond = 5, err_sd, alpha){
Balpha.id = matrix(rnorm(nsamp * ncond), nrow = nsamp, ncol = ncond)
b = rnorm(nsamp)
Balpha.all = matrix(rep(b, ncond), nrow = nsamp, ncol = ncond)
Balpha.zero = matrix(0, nrow = nsamp, ncol = ncond)
Balpha.one = Balpha.zero
b2 = rnorm(nsamp)
Balpha.one[, 1] = b2
Balpha = rbind(Balpha.zero, Balpha.id, Balpha.one, Balpha.all)
E.alpha= matrix(rnorm(nrow(Balpha)*nrow(Balpha), sd=err_sd^(1-alpha)), nrow = nrow(Balpha),
ncol = ncol(Balpha), byrow = T)
Balpha.hat = Balpha+E.alpha
Shat = matrix(err_sd, nrow = nrow(Balpha), ncol = ncol(Balpha), byrow=T)
Bhat = Balpha.hat*Shat^(alpha)
B = Balpha*Shat^(alpha)
return(list(B=B,Bhat=Bhat,Shat=Shat))
}
Set mash data
set.seed(1)
simdata.diff = simple_sims_alpha(500, 5, c(0.2,0.3,0.4,0.2,0.3), alpha = 0.8)
vhat = 0
simdata.diff$z = simdata.diff$Bhat/simdata.diff$Shat
if (vhat == 1) {
V = cor(simdata.diff$z[which(apply(abs(simdata.diff$z),1, max) < 2),])
} else {
V = diag(ncol(simdata.diff$z))
}
TestdataBeta.diff = set_mash_data(Bhat=simdata.diff$Bhat,
Shat=simdata.diff$Shat,
V=as.matrix(V),
alpha = 0)
TestdataZ.diff = set_mash_data(Bhat=simdata.diff$Bhat,
Shat=simdata.diff$Shat,
V=as.matrix(V),
alpha = 1)
Create covariance matrices
# center
TestdataZ.diff.center = set_mash_data(Bhat=apply(as.matrix(TestdataZ.diff$Bhat), 2, function(x) x - mean(x)))
# canonical cov
U.c.diff = cov_canonical(TestdataBeta.diff)
# data_driven from Z
m.1by1.Z.diff = mash_1by1(TestdataBeta.diff, alpha=1)
strong.Z.diff = get_significant_results(m.1by1.Z.diff,0.05)
U.pca.Z.diff = cov_pca(TestdataZ.diff.center,5,strong.Z.diff)
U.ed.beta.diff = cov_ed(TestdataBeta.diff, U.pca.Z.diff, strong.Z.diff)
U.m.beta.diff = mash(TestdataBeta.diff, c(U.c.diff, U.ed.beta.diff))
- Computing 2000 x 273 likelihood matrix.
- Likelihood calculations took 0.05 seconds.
- Fitting model with 273 mixture components.
- Model fitting took 0.52 seconds.
- Computing posterior matrices.
- Computation allocated took 0.08 seconds.
barplot(get_estimated_pi(U.m.beta.diff), las = 2, cex.names = 0.7, main='EE')
U.ed.Z.diff = cov_ed(TestdataZ.diff, U.pca.Z.diff, strong.Z.diff)
U.m.Z.diff = mash(TestdataZ.diff, c(U.c.diff, U.ed.Z.diff))
- Computing 2000 x 241 likelihood matrix.
- Likelihood calculations took 0.04 seconds.
- Fitting model with 241 mixture components.
Warning in REBayes::KWDual(A, rep(1, k), normalize(w), control = control): estimated mixing distribution has some negative values:
consider reducing rtol
Warning in mixIP(matrix_lik = structure(c(1, 1, 0.193843305780807,
0.764185624697885, : Optimization step yields mixture weights that are
either too small, or negative; weights have been corrected and renormalized
after the optimization.
- Model fitting took 0.43 seconds.
- Computing posterior matrices.
- Computation allocated took 0.03 seconds.
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
barplot(get_estimated_pi(U.m.Z.diff), las = 2, cex.names = 0.7, main='EZ')
Compare loglikelihod
get_loglik(U.m.beta.diff)
[1] -3953.136
get_loglik(U.m.Z.diff)
[1] -3890.032
The data is simulated using \(\alpha = 0.8\), which is near 1. The EZ model has larger log likelihood.
If the Shat are different for different condition or samples, the estimated weights are different using EE, EZ models. Using the EZ model, the covariance structures we found are about the standardized effects. Using the EE model, the covariance structure are about the raw effects.
The estimated weights are for covariance matrices instead of correlation matrices. If EZ model puts large weight on equal effect cov (\(11’\)), it means that the standardized effects \(S_{j}^{-1} \beta_{j}\) are strongly correlated among conditions. But this means the raw effect \(\beta_{j}\) has cov \(S_{j}11’S_{j}\), which does not proportional to \(11’\) (if diagonal of \(S_{j}\) are not equal). Therefore, in the EE model, it will put large weight on cov \(S_{j}11’S_{j}\), instead of \(11’\). The covariance structure we found are different, but the correlation structure could be same.
sessionInfo()
R version 3.4.2 (2017-09-28)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.1
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] corrplot_0.84 mashr_0.2-4 ashr_2.1-27
loaded via a namespace (and not attached):
[1] Rcpp_0.12.13 knitr_1.17
[3] magrittr_1.5 REBayes_0.85
[5] MASS_7.3-47 doParallel_1.0.11
[7] pscl_1.5.2 SQUAREM_2017.10-1
[9] lattice_0.20-35 ExtremeDeconvolution_1.3
[11] foreach_1.4.3 plyr_1.8.4
[13] stringr_1.2.0 tools_3.4.2
[15] parallel_3.4.2 grid_3.4.2
[17] rmeta_2.16 git2r_0.19.0
[19] htmltools_0.3.6 iterators_1.0.8
[21] assertthat_0.2.0 yaml_2.1.14
[23] rprojroot_1.2 digest_0.6.12
[25] Matrix_1.2-11 codetools_0.2-15
[27] evaluate_0.10.1 rmarkdown_1.7
[29] stringi_1.1.5 compiler_3.4.2
[31] Rmosek_8.0.69 backports_1.1.1
[33] mvtnorm_1.0-6 truncnorm_1.0-7
This R Markdown site was created with workflowr