Warning in as.POSIXlt.POSIXct(Sys.time()): unknown timezone 'zone/tz/2017c.
1.0/zoneinfo/America/Chicago'
Last updated: 2017-11-17
Code version: 850ca41
library(lattice)
library(ggplot2)
library(colorRamps)
library(mashr)
Loading required package: ashr
library(corrplot)
corrplot 0.84 loaded
data = readRDS('../data/ImmuneQTLSummary.4MASH.rds')
data$max$se = data$max$beta/data$max$z
data$null$se = data$null$beta / data$null$z
K = 10
P = 5
vhat = 0
if (vhat == 1) {
V = cor(data$null$z[which(apply(abs(data$null$z),1, max) < 2),])
} else {
V = diag(ncol(data$null$z))
}
mash_data = mashr::set_mash_data(Bhat = as.matrix(data$max$beta),
Shat = as.matrix(data$max$se),
V = as.matrix(V),
alpha = 0)
The model is fitted in Mash model EE V0
# EE
resEE = readRDS('../output/ImmuneEE.V0.center.mash_model.K10.P5.rds')
resEE$result = readRDS('../output/ImmuneEE.V0.center.mash_posterior.K10.P5.rds')
The log-likelihood of fit is
get_loglik(resEE)
[1] 3165638
Here is a plot of weights learned.
options(repr.plot.width=12, repr.plot.height=4)
barplot(get_estimated_pi(resEE), las = 2, cex.names = 0.7)
Most of the mass is on the null and equal effects.
Here is a visualization for tPCA, which capture 1.4546% mixture component in these data, (via correlation heatmap):
x <- cov2cor(resEE$fitted_g$Ulist[["ED_tPCA"]])
x[x > 1] <- 1
x[x < -1] <- -1
colnames(x) <- colnames(get_lfsr(resEE))
rownames(x) <- colnames(x)
corrplot.mixed(x, tl.pos="d",upper='color',cl.lim=c(-1,1), upper.col=colorRampPalette(rev(c("#D73027","#FC8D59","#FEE090","#FFFFBF",
"#E0F3F8","#91BFDB","#4575B4")))(40),
tl.cex=1.2)
Next we perform SVD on the tPCA covariance matrix, and plot the top eigen vector.
svd.out = svd(resEE$fitted_g$Ulist[["ED_tPCA"]])
v = svd.out$v
colnames(v) = colnames(get_lfsr(resEE))
rownames(v) = colnames(v)
options(repr.plot.width=10, repr.plot.height=5)
for (j in 1:1)
barplot(v[,j]/v[,j][which.max(abs(v[,j]))], cex.names = 0.7,
las = 2, main = paste0("EigenVector ", j, " for PCA-based covariance matrix"))
head(get_significant_results(resEE))
ILMN_1750368_rs6666277 ILMN_1778543_rs34289983 ILMN_1793724_rs303854
8306 10486 11586
ILMN_1812559_rs1075851 ILMN_2262288_rs1061093 ILMN_2408102_rs35262724
13048 16944 18702
mash
uses patterns of sharing to inform estimated effect:Original estimates
MASH
estimates
Here is one example of shinkage:
Original estimates
MASH
estimates
The estimated effects are closer to 0.
Pairwise sharing
x <- get_pairwise_sharing(resEE)
colnames(x) <- colnames(get_lfsr(resEE))
rownames(x) <- colnames(x)
x <- x[rev(rownames(x)),rev(colnames(x))]
x[lower.tri(x)] <- NA
clrs <- colorRampPalette(rev(c("#D73027","#FC8D59","#FEE090","#FFFFBF",
"#E0F3F8","#91BFDB","#4575B4")))(64)
n <- nrow(x)
options(repr.plot.width=9, repr.plot.height=9)
print(levelplot(x[n:1,],col.regions = clrs,xlab = "",ylab = "",
colorkey = TRUE, at = seq(0.7,1,length.out = 64),
scales = list(cex = 0.5,x = list(rot = 45))))
Among the 21485 top SNPs, MASH
found 10183 to be significant in at least one treatment. We refer to these as the ‘top eQTLs’.
Using MASH
, we found 9621 genes with an eQTL in control, 9439 genes with an eQTL in lps6h, 9740 genes with an eQTL in lps90, 9860 genes with an eQTL in mdp6h, 9846 genes with an eQTL in mdp90, 9746 genes with an eQTL in rna6h, 10051 genes with an eQTL in rna90.
In the original paper, they identified 717-1653 genes with an eQTL in each condition. So, we found more genes with an eQTL using MASH
.
There are 8725 top eQTLs with significant effects among all treatments.
Find genes having \(\beta_{Trt}\) significantly different from \(\beta_{Ctrl}\), among the top eQTLs. The number in [] is the result from the paper. Note that, there are only percentages provided in the paper. Since the number of top eQTLs we found are different, the percentage may not directly comparable.
subset.data = function(data, subset){
data.subset = data
data.subset$Bhat = data$Bhat[subset,]
data.subset$Shat = data$Shat[subset,]
data.subset$Shat_alpha = data$Shat_alpha[subset,]
data.subset
}
eQTL.index.lps6h = get_significant_results(resEE, conditions = 2)
A.lps6h = rbind(c(1,-1,0,0,0,0,0))
row.names(A.lps6h) = c('Ctrl-lps6h')
resEE.lps6h = resEE
eQTL.lps6h = subset.data(mash_data, eQTL.index.lps6h)
resEE.lps6h$result = mash_compute_posterior_matrices(resEE, eQTL.lps6h, A=A.lps6h, algorithm.version = 'R')
saveRDS(resEE.lps6h,
paste0('../output/ImmuneEE.V',vhat,'.center.resEE.lps6h.K',K,'.P',P,'.rds'))
resEE.lps6h = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.lps6h.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.lps6h))
[1] 510
Using MASH
, we found 5.4% [17%] of lps 6h eQTLs are reQTLs.
resEE.lps90 = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.lps90.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.lps90))
[1] 371
We found 3.81% [15%] of lps 90 eQTLs are reQTLs.
resEE.mdp6h = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.mdp6h.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.mdp6h))
[1] 625
We found 6.34% [9%] of mdp 6h eQTLs are reQTLs.
resEE.mdp90 = readRDS(paste0('../output/ImmuneEE.V',vhat,'center.resEE.mdp90.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.mdp90))
[1] 388
We found 3.94% [9%] of mdp 90 eQTLs are reQTLs.
resEE.rna6h = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.rna6h.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.rna6h))
[1] 610
We found 6.26% [18%] of rna 6h eQTLs are reQTLs.
resEE.rna90 = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.rna90.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.rna90))
[1] 329
We found 3.27% [3%] of rna 90 eQTLs are reQTLs.
In the paper, they found 3-18% of cis eQTLs in each condition are reQTLs.
reQTL.index.lps6h = get_significant_results(resEE.lps6h)
A.lps6hTRT = rbind(c(0,1,0,-1,0,0,0),
c(0,1,0,0,0,-1,0))
row.names(A.lps6hTRT) = c('lps6h-mdp6h', 'lps6h-rna6h')
resEE.lps6hTRT = resEE
reQTL.lps6h = subset.data(eQTL.lps6h, reQTL.index.lps6h)
resEE.lps6hTRT$result = mash_compute_posterior_matrices(resEE, reQTL.lps6h, A=A.lps6hTRT, algorithm.version = 'R')
saveRDS(resEE.lps6hTRT,
paste0('../output/ImmuneEE.V',vhat,'.center.resEE.lps6hTRT.K',K,'.P',P,'.rds'))
resEE.lps6hTRT = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.lps6hTRT.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.lps6hTRT))
[1] 324
We found 57.45% [32%] lps6h reQTLs are stimulus specific compared with mdp6h, 53.33% [34%] lps6h reQTLs are stimulus specific compared with rna6h.
resEE.lps90TRT = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.lps90TRT.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.lps90TRT))
[1] 339
We found 55.8% [14%] lps 90min reQTLs are stimulus specific compared with mdp 90min, 78.71% [51%] lps 90min reQTLs are stimulus specific compared with rna 90min.
resEE.mdp6hTRT = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.mdp6hTRT.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.mdp6hTRT))
[1] 327
We found 47.04% [15%] mdp 6h reQTLs are stimulus specific compared with lps 6h, 16.16% [13%] mdp 6h reQTLs are stimulus specific compared with rna 6h.
resEE.mdp90TRT = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.mdp90TRT.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.mdp90TRT))
[1] 346
We found 51.29% [15%] mdp 90min reQTLs are stimulus specific compared with lps 90min, 70.1% [46%] mdp 90min reQTLs are stimulus specific compared with rna 90min.
resEE.rna6hTRT = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.rna6hTRT.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.rna6hTRT))
[1] 297
We found 44.92% [21%] rna 6h reQTLs are stimulus specific compared with lps 6h, 21.31% [45%] rna 6h reQTLs are stimulus specific compared with mdp 6h.
resEE.rna90TRT = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.rna90TRT.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.rna90TRT))
[1] 230
We found 63.83% [38%] rna 90min reQTLs are stimulus specific compared with lps 90min, 47.11% [29%] rna 90min reQTLs are stimulus specific compared with mdp 90min.
A.lps6hTime = rbind(c(0,1,-1,0,0,0,0))
row.names(A.lps6hTime) = c('lps6h-lps90')
resEE.lps6hTime = resEE
resEE.lps6hTime$result = mash_compute_posterior_matrices(resEE, reQTL.lps6h , A=A.lps6hTime, algorithm.version = 'R')
saveRDS(resEE.lps6hTime,
paste0('../output/ImmuneEE.V',vhat,'.center.resEE.lps6hTime.K',K,'.P',P,'.rds'))
resEE.lps6hTime = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.lps6hTime.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.lps6hTime))
[1] 335
We found 65.69% [45%] lps6h reQTLs are time point specific compared with lps90min.
resEE.lps90Time = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.lps90Time.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.lps90Time))
[1] 222
We found 59.84% [36%] lps 90min reQTLs are time point specific compared with lps6h.
resEE.mdp6hTime = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.mdp6hTime.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.mdp6hTime))
[1] 366
We found 47.04% [40%] mdp 6h reQTLs are time point specific compared with mdp 90min.
resEE.mdp90Time = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.mdp90Time.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.mdp90Time))
[1] 210
We found 54.12% [38%] mdp 90min reQTLs time point specific compared with mdp 6h.
resEE.rna6hTime = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.rna6hTime.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.rna6hTime))
[1] 511
We found 83.77% [64%] rna 6h reQTLs are time point specific compared with rna 90min.
resEE.rna90Time = readRDS(paste0('../output/ImmuneEE.V',vhat,'.center.resEE.rna90Time.K',K,'.P',P,'.rds'))
length(get_significant_results(resEE.rna90Time))
[1] 186
We found 56.53% [32%] rna 90min reQTLs are time point specific compared with rna 6h.
sessionInfo()
R version 3.4.2 (2017-09-28)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.1
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] corrplot_0.84 mashr_0.2-4 ashr_2.1-27 colorRamps_2.3
[5] ggplot2_2.2.1 lattice_0.20-35
loaded via a namespace (and not attached):
[1] Rcpp_0.12.13 compiler_3.4.2 git2r_0.19.0
[4] plyr_1.8.4 iterators_1.0.8 tools_3.4.2
[7] digest_0.6.12 evaluate_0.10.1 tibble_1.3.4
[10] gtable_0.2.0 rlang_0.1.2 Matrix_1.2-11
[13] foreach_1.4.3 yaml_2.1.14 parallel_3.4.2
[16] mvtnorm_1.0-6 stringr_1.2.0 knitr_1.17
[19] rprojroot_1.2 grid_3.4.2 rmarkdown_1.7
[22] rmeta_2.16 magrittr_1.5 backports_1.1.1
[25] scales_0.5.0 codetools_0.2-15 htmltools_0.3.6
[28] MASS_7.3-47 assertthat_0.2.0 colorspace_1.3-2
[31] labeling_0.3 stringi_1.1.5 lazyeval_0.2.1
[34] munsell_0.4.3 doParallel_1.0.11 pscl_1.5.2
[37] truncnorm_1.0-7 SQUAREM_2017.10-1
This R Markdown site was created with workflowr