Last updated: 2017-12-10
Code version: d515b4d
library(mashr)
Loading required package: ashr
library(corrplot)
corrplot 0.84 loaded
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(stringr)
source('~/Documents/GitHub/mashr-stephens/R/likelihoods_scaleddata.R')
source('~/Documents/GitHub/mashr-stephens/R/RcppExports.R')
source('~/Documents/GitHub/mashr-stephens/R/set_data.R')
source('~/Documents/GitHub/mashr-stephens/R/posterior.R')
source('~/Documents/GitHub/mashr-stephens/R/posterior_common_cov.R')
source('~/Documents/GitHub/mashr-stephens/R/posterior_lowmem.R')
Let mash_posterior
function returns posterior weights as well:
mash_compute_posterior_matrices_weights = function(g, data, pi_thresh = 1e-10, algorithm.version = c("Rcpp", "R"), A=NULL ){
if (!is.null(A) && algorithm.version=='Rcpp'){
stop("FIXME: not implemented")
}
if(class(g)=="mash"){
alpha = g$alpha
g = g$fitted_g
if(alpha != data$alpha){
stop('The alpha in data is not the one used to compute the mash model.')
}
}
else{
message('Warning: Please make sure the alpha in data is consistent with the `alpha` used to compute the fitted_g.')
}
xUlist = expand_cov(g$Ulist,g$grid,g$usepointmass)
lm_res = calc_relative_lik_matrix(data, xUlist)
which.comp = (g$pi > pi_thresh)
posterior_weights = compute_posterior_weights(g$pi[which.comp], lm_res$lik_matrix[,which.comp])
posterior_matrices = compute_posterior_matrices(data, xUlist[which.comp],
posterior_weights,
algorithm.version, A=A)
if ((!all(data$Shat_alpha == 1)) && (algorithm.version=='Rcpp')) {
message("FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE")
## Recover the scale of posterior(Bhat)
posterior_matrices$PosteriorMean = posterior_matrices$PosteriorMean * data$Shat_alpha
posterior_matrices$PosteriorSD = posterior_matrices$PosteriorSD * data$Shat_alpha
}
return(list(posterior_weights = posterior_weights,
posterior_matrices = posterior_matrices))
}
mash
model based on data
→ estimated weights πdata
using the mash
model → posterior weights for the covariance structuresmash
model.mash
model:
set.seed(1)
simdata.equal = simple_sims(500,5,0.5)
# set mash data
TestdataZ.equal = set_mash_data(simdata.equal$Bhat, simdata.equal$Shat, alpha=1)
# center
TestdataZ.equal.center = set_mash_data(apply(as.matrix(TestdataZ.equal$Bhat), 2, function(x) x - mean(x)))
# canonical cov
U.c.equal = cov_canonical(TestdataZ.equal)
# data_driven
m.1by1.Z.equal = mash_1by1(TestdataZ.equal, alpha=1)
strong.Z.equal = get_significant_results(m.1by1.Z.equal,0.05)
U.pca.Z.equal = cov_pca(TestdataZ.equal.center,5,strong.Z.equal)
U.ed.Z.equal = cov_ed(TestdataZ.equal, U.pca.Z.equal, strong.Z.equal)
U.m.Z.equal = mash(TestdataZ.equal, c(U.c.equal, U.ed.Z.equal))
- Computing 2000 x 257 likelihood matrix.
- Likelihood calculations took 0.15 seconds.
- Fitting model with 257 mixture components.
- Model fitting took 0.61 seconds.
- Computing posterior matrices.
- Computation allocated took 0.01 seconds.
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
barplot(get_estimated_pi(U.m.Z.equal), las = 2, cex.names = 0.7, main='EZ')
Posterior:
Post = mash_compute_posterior_matrices_weights(U.m.Z.equal, TestdataZ.equal)
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
U.m.Z.equal$result = Post$posterior_matrices
posterior_weights = Post$posterior_weights
For every sample, the posterior weights for the covariance structures:
Weight = matrix(0,nrow=nrow(U.m.Z.equal$result$PosteriorMean),17)
Weight[,1] = posterior_weights[,1]
ind = as.logical(str_count(colnames(posterior_weights), "ED_PCA_1"))
Weight[,2] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "ED_PCA_2"))
Weight[,3] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "ED_PCA_3"))
Weight[,4] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "ED_PCA_4"))
Weight[,5] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "ED_PCA_5"))
Weight[,6] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "ED_tPCA"))
Weight[,7] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "identity"))
Weight[,8] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "condition_1"))
Weight[,9] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "condition_2"))
Weight[,10] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "condition_3"))
Weight[,11] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "condition_4"))
Weight[,12] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "condition_5"))
Weight[,13] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "equal_effects"))
Weight[,14] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "simple_het_1"))
Weight[,15] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "simple_het_2"))
Weight[,16] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
ind = as.logical(str_count(colnames(posterior_weights), "simple_het_3"))
Weight[,17] = apply(posterior_weights[,ind, drop=FALSE], 1, sum)
colnames(Weight) = c('null', 'ED_PCA_1','ED_PCA_2','ED_PCA_3','ED_PCA_4','ED_PCA_5','ED_tPCA',
'identity','condition_1', 'condition_2','condition_3','condition_4','condition_5',
'equal_effects', 'simple_het_1', 'simple_het_2', 'simple_het_3')
row.names(Weight) = row.names(U.m.Z.equal$result$PosteriorMean)
Identify the covariance structure with the max posterior weight for each sample:
Freqe = apply(Weight,1, which.max)
Da = data.frame(id = 1:length(Freqe), Freq = Freqe)
Da_summary = Da %>% group_by(Freq) %>% summarise(Total=n())
x = Da_summary$Total
names(x) = colnames(Weight)[Da_summary$Freq]
barplot(x, las = 2, cex.names = 0.7)
The posterior weights for the covariance structures have similar pattern to the weights in mash
model.
set.seed(1)
simdata.diff = simple_sims(500, 5, rep(c(0.5,0.4,5,1,1), 400))
simdata.diff$z = simdata.diff$Bhat/simdata.diff$Shat
# set mash data
TestdataBeta.diff = set_mash_data(Bhat=simdata.diff$Bhat,
Shat=simdata.diff$Shat,
alpha = 0)
# Create covariance matrices
# center
TestdataZ.diff.center = set_mash_data(Bhat = apply(as.matrix(simdata.diff$z), 2, function(x) x - mean(x)), alpha=0)
# canonical cov
U.c.diff = cov_canonical(TestdataZ.diff.center)
# data_driven
m.1by1.Z.diff = mash_1by1(TestdataBeta.diff, alpha=1)
strong.Z.diff = get_significant_results(m.1by1.Z.diff,0.05)
U.pca.Z.diff = cov_pca(TestdataZ.diff.center,5,strong.Z.diff)
U.ed.beta.diff = cov_ed(TestdataBeta.diff, U.pca.Z.diff, strong.Z.diff)
U.m.beta.diff = mash(TestdataBeta.diff, c(U.c.diff, U.ed.beta.diff))
- Computing 2000 x 337 likelihood matrix.
- Likelihood calculations took 0.60 seconds.
- Fitting model with 337 mixture components.
- Model fitting took 0.80 seconds.
- Computing posterior matrices.
- Computation allocated took 0.09 seconds.
barplot(get_estimated_pi(U.m.beta.diff), las = 2, cex.names = 0.7, main='EE')
Post = mash_compute_posterior_matrices_weights(U.m.beta.diff, TestdataBeta.diff)
U.m.beta.diff$result = Post$posterior_matrices
posterior_weights = Post$posterior_weights
Freqe = apply(Weight,1, which.max)
Da = data.frame(id = 1:length(Freqe), Freq = Freqe)
Da_summary = Da %>% group_by(Freq) %>% summarise(Total=n())
x = Da_summary$Total
names(x) = colnames(Weight)[Da_summary$Freq]
barplot(x, las = 2, cex.names = 0.7)
The posterior weights for the covariance structures have similar pattern to the weights in mash
model.
mash
model based on null data
→ estimated weights πnull data
using the mash
model → posterior weights for the covariance structures; Obtain posterior mean for max data
using the mash
model → posterior weights for the covariance structuresmash
model.data = readRDS('~/Documents/GitHub/mash-application-immune/data/ImmuneQTLSummary.4MASH.rds')
data$max$se = data$max$beta/data$max$z
data$null$se = data$null$beta / data$null$z
V = cor(data$null$z[which(apply(abs(data$null$z),1, max) < 2),])
mash_data = set_mash_data(Bhat = data$null$beta,
Shat = data$null$se,
alpha=1,
V = V)
resEZ = readRDS('~/Documents/GitHub/mash-application-immune/output/ImmuneEZ.V1.center.mash_model.K10.P5.rds')
Post = mash_compute_posterior_matrices_weights(resEZ, mash_data)
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
resEZ$result = Post$posterior_matrices
posterior_weights = Post$posterior_weights
barplot(get_estimated_pi(resEZ), las = 2, cex.names = 0.7)
Freqe = apply(Weight,1, which.max)
Da = data.frame(id = 1:length(Freqe), Freq = Freqe)
Da_summary = Da %>% group_by(Freq) %>% summarise(Total=n())
Da_summary
# A tibble: 10 x 2
Freq Total
<int> <int>
1 1 187216
2 4 20
3 7 193
4 8 5698
5 9 6
6 15 35
7 21 2
8 23 126
9 25 1
10 26 4
x = Da_summary$Total
names(x) = colnames(Weight)[Da_summary$Freq]
barplot(x, las = 2, cex.names = 0.7)
The number of genes with high posterior weights on equal_effects
are small.
common = Reduce(intersect, list(get_significant_results(resEZ, conditions=1),
get_significant_results(resEZ, conditions=2),
get_significant_results(resEZ, conditions=3),
get_significant_results(resEZ, conditions=4),
get_significant_results(resEZ, conditions=5),
get_significant_results(resEZ, conditions=6),
get_significant_results(resEZ, conditions=7)))
PM.sign = sign(resEZ$result$PosteriorMean[common,])
all(PM.sign - PM.sign[,1] == 0)
[1] TRUE
There is no qualitative interaction cases in the null set.
mash_data = set_mash_data(Bhat = data$max$beta,
Shat = data$max$se,
alpha=1,
V = V)
resEZ = readRDS('~/Documents/GitHub/mash-application-immune/output/ImmuneEZ.V1.center.mash_model.K10.P5.rds')
Post = mash_compute_posterior_matrices_weights(resEZ, mash_data)
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
resEZ$result = Post$posterior_matrices
posterior_weights = Post$posterior_weights
Freqe = apply(Weight,1, which.max)
Da = data.frame(id = 1:length(Freqe), Freq = Freqe)
Da_summary = Da %>% group_by(Freq) %>% summarise(Total=n())
Da_summary
# A tibble: 11 x 2
Freq Total
<int> <int>
1 1 12003
2 4 90
3 7 1470
4 8 6695
5 9 24
6 12 11
7 15 640
8 21 52
9 23 459
10 25 18
11 26 23
x = Da_summary$Total
names(x) = colnames(Weight)[Da_summary$Freq]
barplot(x, las = 2, cex.names = 0.7)
There are some genes with high posterior weights on ED_tFlash
and identity
.
Posterior weights for the 4 genes have effects in different directions in different conditions, among the eQTLs that significant among all treatments:
common = Reduce(intersect, list(get_significant_results(resEZ, conditions=1),
get_significant_results(resEZ, conditions=2),
get_significant_results(resEZ, conditions=3),
get_significant_results(resEZ, conditions=4),
get_significant_results(resEZ, conditions=5),
get_significant_results(resEZ, conditions=6),
get_significant_results(resEZ, conditions=7)))
par(mfrow=c(2,2))
for(i in c(3052, 4156, 4303, 4726)){
barplot(Weight[common,][i, ], las = 2, cex.names = 0.7, main=row.names(Weight[common])[i])
}
par(mfrow=c(1,1))
sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.2
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] stringr_1.2.0 dplyr_0.7.4 corrplot_0.84 mashr_0.2-4 ashr_2.1-27
loaded via a namespace (and not attached):
[1] Rcpp_0.12.14 compiler_3.4.3
[3] git2r_0.19.0 plyr_1.8.4
[5] bindr_0.1 iterators_1.0.8
[7] tools_3.4.3 digest_0.6.12
[9] evaluate_0.10.1 tibble_1.3.4
[11] lattice_0.20-35 pkgconfig_2.0.1
[13] rlang_0.1.2 Matrix_1.2-12
[15] foreach_1.4.3 yaml_2.1.14
[17] parallel_3.4.3 mvtnorm_1.0-6
[19] bindrcpp_0.2 knitr_1.17
[21] REBayes_0.85 rprojroot_1.2
[23] grid_3.4.3 glue_1.1.1
[25] R6_2.2.2 rmarkdown_1.7
[27] rmeta_2.16 magrittr_1.5
[29] backports_1.1.1 codetools_0.2-15
[31] htmltools_0.3.6 MASS_7.3-47
[33] assertthat_0.2.0 stringi_1.1.5
[35] Rmosek_8.0.69 pscl_1.5.2
[37] doParallel_1.0.11 truncnorm_1.0-7
[39] SQUAREM_2017.10-1 ExtremeDeconvolution_1.3
This R Markdown site was created with workflowr