Last updated: 2017-12-10
Code version: d515b4d
library(mashr)
Loading required package: ashr
library(corrplot)
corrplot 0.84 loaded
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(stringr)
source('~/Documents/GitHub/mashr-stephens/R/likelihoods_scaleddata.R')
source('~/Documents/GitHub/mashr-stephens/R/RcppExports.R')
source('~/Documents/GitHub/mashr-stephens/R/set_data.R')
source('~/Documents/GitHub/mashr-stephens/R/posterior.R')
source('~/Documents/GitHub/mashr-stephens/R/posterior_common_cov.R')
source('~/Documents/GitHub/mashr-stephens/R/posterior_lowmem.R')
Let mash_posterior
function returns posterior weights as well:
mash_compute_posterior_matrices_weights = function(g, data, pi_thresh = 1e-10, algorithm.version = c("Rcpp", "R"), A=NULL ){
if (!is.null(A) && algorithm.version=='Rcpp'){
stop("FIXME: not implemented")
}
if(class(g)=="mash"){
alpha = g$alpha
g = g$fitted_g
if(alpha != data$alpha){
stop('The alpha in data is not the one used to compute the mash model.')
}
}
else{
message('Warning: Please make sure the alpha in data is consistent with the `alpha` used to compute the fitted_g.')
}
xUlist = expand_cov(g$Ulist,g$grid,g$usepointmass)
lm_res = calc_relative_lik_matrix(data, xUlist)
which.comp = (g$pi > pi_thresh)
posterior_weights = compute_posterior_weights(g$pi[which.comp], lm_res$lik_matrix[,which.comp])
posterior_matrices = compute_posterior_matrices(data, xUlist[which.comp],
posterior_weights,
algorithm.version, A=A)
if ((!all(data$Shat_alpha == 1)) && (algorithm.version=='Rcpp')) {
message("FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE")
## Recover the scale of posterior(Bhat)
posterior_matrices$PosteriorMean = posterior_matrices$PosteriorMean * data$Shat_alpha
posterior_matrices$PosteriorSD = posterior_matrices$PosteriorSD * data$Shat_alpha
}
return(list(posterior_weights = posterior_weights,
posterior_matrices = posterior_matrices))
}
mash
model based on data
\(\rightarrow\) estimated weights \(\pi\)data
using the mash
model \(\rightarrow\) posterior weights for the covariance structuresmash
model.mash
model:
set.seed(1)
simdata.equal = simple_sims(500,5,0.5)
# set mash data
TestdataZ.equal = set_mash_data(simdata.equal$Bhat, simdata.equal$Shat, alpha=1)
# center
TestdataZ.equal.center = set_mash_data(apply(as.matrix(TestdataZ.equal$Bhat), 2, function(x) x - mean(x)))
# canonical cov
U.c.equal = cov_canonical(TestdataZ.equal)
# data_driven
m.1by1.Z.equal = mash_1by1(TestdataZ.equal, alpha=1)
strong.Z.equal = get_significant_results(m.1by1.Z.equal,0.05)
U.pca.Z.equal = cov_pca(TestdataZ.equal.center,5,strong.Z.equal)
U.ed.Z.equal = cov_ed(TestdataZ.equal, U.pca.Z.equal, strong.Z.equal)
U.m.Z.equal = mash(TestdataZ.equal, c(U.c.equal, U.ed.Z.equal))
- Computing 2000 x 257 likelihood matrix.
- Likelihood calculations took 0.15 seconds.
- Fitting model with 257 mixture components.
- Model fitting took 0.61 seconds.
- Computing posterior matrices.
- Computation allocated took 0.01 seconds.
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
barplot(get_estimated_pi(U.m.Z.equal), las = 2, cex.names = 0.7, main='EZ')
Posterior:
Post = mash_compute_posterior_matrices_weights(U.m.Z.equal, TestdataZ.equal)
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
U.m.Z.equal$result = Post$posterior_matrices
posterior_weights = Post$posterior_weights
For every sample, the posterior weights for the covariance structures:
Weight = matrix(0,nrow=nrow(U.m.Z.equal$result$PosteriorMean),17)
Weight[,1] = posterior_weights[,1]
ind = as.logical(str_count(colnames(posterior_weights), "ED_PCA_1"))
Weight[,2] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "ED_PCA_2"))
Weight[,3] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "ED_PCA_3"))
Weight[,4] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "ED_PCA_4"))
Weight[,5] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "ED_PCA_5"))
Weight[,6] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "ED_tPCA"))
Weight[,7] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "identity"))
Weight[,8] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "condition_1"))
Weight[,9] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "condition_2"))
Weight[,10] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "condition_3"))
Weight[,11] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "condition_4"))
Weight[,12] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "condition_5"))
Weight[,13] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "equal_effects"))
Weight[,14] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "simple_het_1"))
Weight[,15] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "simple_het_2"))
Weight[,16] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "simple_het_3"))
Weight[,17] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
colnames(Weight) = c('null', 'ED_PCA_1','ED_PCA_2','ED_PCA_3','ED_PCA_4','ED_PCA_5','ED_tPCA',
'identity','condition_1', 'condition_2','condition_3','condition_4','condition_5',
'equal_effects', 'simple_het_1', 'simple_het_2', 'simple_het_3')
row.names(Weight) = row.names(U.m.Z.equal$result$PosteriorMean)
Identify the covariance structure with the max posterior weight for each sample:
Freqe = apply(Weight,1, which.max)
Da = data.frame(id = 1:length(Freqe), Freq = Freqe)
Da_summary = Da %>% group_by(Freq) %>% summarise(Total=n())
x = Da_summary$Total
names(x) = colnames(Weight)[Da_summary$Freq]
barplot(x, las = 2, cex.names = 0.7)
The posterior weights for the covariance structures have similar pattern to the weights in mash
model.
set.seed(1)
simdata.diff = simple_sims(500, 5, rep(c(0.5,0.4,5,1,1), 400))
simdata.diff$z = simdata.diff$Bhat/simdata.diff$Shat
# set mash data
TestdataBeta.diff = set_mash_data(Bhat=simdata.diff$Bhat,
Shat=simdata.diff$Shat,
alpha = 0)
# Create covariance matrices
# center
TestdataZ.diff.center = set_mash_data(Bhat = apply(as.matrix(simdata.diff$z), 2, function(x) x - mean(x)), alpha=0)
# canonical cov
U.c.diff = cov_canonical(TestdataZ.diff.center)
# data_driven
m.1by1.Z.diff = mash_1by1(TestdataBeta.diff, alpha=1)
strong.Z.diff = get_significant_results(m.1by1.Z.diff,0.05)
U.pca.Z.diff = cov_pca(TestdataZ.diff.center,5,strong.Z.diff)
U.ed.beta.diff = cov_ed(TestdataBeta.diff, U.pca.Z.diff, strong.Z.diff)
U.m.beta.diff = mash(TestdataBeta.diff, c(U.c.diff, U.ed.beta.diff))
- Computing 2000 x 337 likelihood matrix.
- Likelihood calculations took 0.60 seconds.
- Fitting model with 337 mixture components.
- Model fitting took 0.80 seconds.
- Computing posterior matrices.
- Computation allocated took 0.09 seconds.
barplot(get_estimated_pi(U.m.beta.diff), las = 2, cex.names = 0.7, main='EE')
Post = mash_compute_posterior_matrices_weights(U.m.beta.diff, TestdataBeta.diff)
U.m.beta.diff$result = Post$posterior_matrices
posterior_weights = Post$posterior_weights
Freqe = apply(Weight,1, which.max)
Da = data.frame(id = 1:length(Freqe), Freq = Freqe)
Da_summary = Da %>% group_by(Freq) %>% summarise(Total=n())
x = Da_summary$Total
names(x) = colnames(Weight)[Da_summary$Freq]
barplot(x, las = 2, cex.names = 0.7)
The posterior weights for the covariance structures have similar pattern to the weights in mash
model.
mash
model based on null data
\(\rightarrow\) estimated weights \(\pi\)null data
using the mash
model \(\rightarrow\) posterior weights for the covariance structures; Obtain posterior mean for max data
using the mash
model \(\rightarrow\) posterior weights for the covariance structuresmash
model.data = readRDS('~/Documents/GitHub/mash-application-immune/data/ImmuneQTLSummary.4MASH.rds')
data$max$se = data$max$beta/data$max$z
data$null$se = data$null$beta / data$null$z
V = cor(data$null$z[which(apply(abs(data$null$z),1, max) < 2),])
mash_data = set_mash_data(Bhat = data$null$beta,
Shat = data$null$se,
alpha=1,
V = V)
resEZ = readRDS('~/Documents/GitHub/mash-application-immune/output/ImmuneEZ.V1.center.mash_model.K10.P5.rds')
Post = mash_compute_posterior_matrices_weights(resEZ, mash_data)
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
resEZ$result = Post$posterior_matrices
posterior_weights = Post$posterior_weights
barplot(get_estimated_pi(resEZ), las = 2, cex.names = 0.7)
Freqe = apply(Weight,1, which.max)
Da = data.frame(id = 1:length(Freqe), Freq = Freqe)
Da_summary = Da %>% group_by(Freq) %>% summarise(Total=n())
Da_summary
# A tibble: 10 x 2
Freq Total
<int> <int>
1 1 187216
2 4 20
3 7 193
4 8 5698
5 9 6
6 15 35
7 21 2
8 23 126
9 25 1
10 26 4
x = Da_summary$Total
names(x) = colnames(Weight)[Da_summary$Freq]
barplot(x, las = 2, cex.names = 0.7)
The number of genes with high posterior weights on equal_effects
are small.
common = Reduce(intersect, list(get_significant_results(resEZ, conditions=1),
get_significant_results(resEZ, conditions=2),
get_significant_results(resEZ, conditions=3),
get_significant_results(resEZ, conditions=4),
get_significant_results(resEZ, conditions=5),
get_significant_results(resEZ, conditions=6),
get_significant_results(resEZ, conditions=7)))
PM.sign = sign(resEZ$result$PosteriorMean[common,])
all(PM.sign - PM.sign[,1] == 0)
[1] TRUE
There is no qualitative interaction cases in the null set.
mash_data = set_mash_data(Bhat = data$max$beta,
Shat = data$max$se,
alpha=1,
V = V)
resEZ = readRDS('~/Documents/GitHub/mash-application-immune/output/ImmuneEZ.V1.center.mash_model.K10.P5.rds')
Post = mash_compute_posterior_matrices_weights(resEZ, mash_data)
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
resEZ$result = Post$posterior_matrices
posterior_weights = Post$posterior_weights
Freqe = apply(Weight,1, which.max)
Da = data.frame(id = 1:length(Freqe), Freq = Freqe)
Da_summary = Da %>% group_by(Freq) %>% summarise(Total=n())
Da_summary
# A tibble: 11 x 2
Freq Total
<int> <int>
1 1 12003
2 4 90
3 7 1470
4 8 6695
5 9 24
6 12 11
7 15 640
8 21 52
9 23 459
10 25 18
11 26 23
x = Da_summary$Total
names(x) = colnames(Weight)[Da_summary$Freq]
barplot(x, las = 2, cex.names = 0.7)
There are some genes with high posterior weights on ED_tFlash
and identity
.
Posterior weights for the 4 genes have effects in different directions in different conditions, among the eQTLs that significant among all treatments:
common = Reduce(intersect, list(get_significant_results(resEZ, conditions=1),
get_significant_results(resEZ, conditions=2),
get_significant_results(resEZ, conditions=3),
get_significant_results(resEZ, conditions=4),
get_significant_results(resEZ, conditions=5),
get_significant_results(resEZ, conditions=6),
get_significant_results(resEZ, conditions=7)))
par(mfrow=c(2,2))
for(i in c(3052, 4156, 4303, 4726)){
barplot(Weight[common,][i, ], las = 2, cex.names = 0.7, main=row.names(Weight[common])[i])
}
par(mfrow=c(1,1))
sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.2
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] stringr_1.2.0 dplyr_0.7.4 corrplot_0.84 mashr_0.2-4 ashr_2.1-27
loaded via a namespace (and not attached):
[1] Rcpp_0.12.14 compiler_3.4.3
[3] git2r_0.19.0 plyr_1.8.4
[5] bindr_0.1 iterators_1.0.8
[7] tools_3.4.3 digest_0.6.12
[9] evaluate_0.10.1 tibble_1.3.4
[11] lattice_0.20-35 pkgconfig_2.0.1
[13] rlang_0.1.2 Matrix_1.2-12
[15] foreach_1.4.3 yaml_2.1.14
[17] parallel_3.4.3 mvtnorm_1.0-6
[19] bindrcpp_0.2 knitr_1.17
[21] REBayes_0.85 rprojroot_1.2
[23] grid_3.4.3 glue_1.1.1
[25] R6_2.2.2 rmarkdown_1.7
[27] rmeta_2.16 magrittr_1.5
[29] backports_1.1.1 codetools_0.2-15
[31] htmltools_0.3.6 MASS_7.3-47
[33] assertthat_0.2.0 stringi_1.1.5
[35] Rmosek_8.0.69 pscl_1.5.2
[37] doParallel_1.0.11 truncnorm_1.0-7
[39] SQUAREM_2017.10-1 ExtremeDeconvolution_1.3
This R Markdown site was created with workflowr