Set up the data

# Load required packages
library(mashr); library(ExtremeDeconvolution); library(flashr2)

Loading required package: ashr

# read data
data = readRDS('../data/ImmuneQTLSummary.4MASH.rds')
data$max$se = data$max$beta/data$max$z
data$null$se = data$null$beta / data$null$z

# set parameters
K = 10
P = 5
vhat = 1

We estimate the covariance using column-centered Z scores

D.center = apply(as.matrix(data$max$z), 2, function(x) x - mean(x))
mash_data_center = mashr::set_mash_data(Bhat = as.matrix(D.center))

Generate covariance matrices for each row

From Flash, we have \[\tilde{Z} = LF' + E\] where F is \(7 \times K\), L is \(n \times K\), E is \(n\times7\).

\[F = \left( \begin{array}{c c c c} f_{1} & f_{2} & \cdots & f_{k} \end{array}\right)_{p\times K}\] For each gene i, \[z_{i} = \sum_{k=1}^{K}l_{ik} f_{k}\]. The covariance matrix \[U_{i} = z_{i}z_{i}'\] could capture the patterns in the ith sample.

FlashResult = readRDS('~/Documents/GitHub/mash-application-immune/output/Immune.flash2.center.greedy.K10.rds')
n = nrow(FlashResult$L_flash)
U = list()
Z = matrix(0, nrow=n, ncol=7)
for(i in 1:n){
  zi = apply(t(FlashResult$L_flash[i,] * t(FlashResult$F_flash)), 1, sum)
  Z[i,] = zi
  U[[i]] = zi %*% t(zi) 
}

Flash_res = flash_get_lf(FlashResult$f)

U.flash = c(U, mashr::cov_from_factors(t(as.matrix(FlashResult$F_flash[,1:5])), "Flash"), 
            list("tFlash" = t(Flash_res) %*% Flash_res / nrow(data$max$z)))

# PCA matrices
U.pca = cov_pca(mash_data_center, P)

# Emperical data matrices
# Denoised data-driven matrices
U.dd = c(U.flash, U.pca, list("XX" = t(D.center) %*% D.center / nrow(data$max$z)))

mash_data = mashr::set_mash_data(Bhat = as.matrix(data$max$z))

# failed
# U.ed = cov_ed(mash_data, U.dd)

The refining step failed. Then we try to use these covariance matrices in mash.

Generate canonical covariance matrices:

# Canonical
U.can = cov_canonical(mash_data)

if (vhat == 1) {
  V = cor(data$null$z[which(apply(abs(data$null$z),1, max) < 2),])
} else {
  V = diag(ncol(data$null$z))
}

mash_data = mashr::set_mash_data(Bhat = as.matrix(data$null$beta), 
                                 Shat = as.matrix(data$null$se), 
                                 V=as.matrix(V), alpha=1)

# mash_model = mash(mash_data, c(U.can, U.dd), outputlevel = 1)

The method fails again. The size is too large.

mashr could not handle the optimization.

Session information

sessionInfo()

R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.2

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] flashr2_0.3-3            ExtremeDeconvolution_1.3
[3] mashr_0.2-4              ashr_2.1-27             

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.14      compiler_3.4.3    git2r_0.20.0     
 [4] plyr_1.8.4        iterators_1.0.9   tools_3.4.3      
 [7] digest_0.6.13     evaluate_0.10.1   tibble_1.3.4     
[10] gtable_0.2.0      lattice_0.20-35   rlang_0.1.6      
[13] Matrix_1.2-12     foreach_1.4.4     yaml_2.1.16      
[16] parallel_3.4.3    mvtnorm_1.0-6     stringr_1.2.0    
[19] knitr_1.17        rprojroot_1.2     grid_3.4.3       
[22] rmarkdown_1.8     rmeta_2.16        ggplot2_2.2.1    
[25] magrittr_1.5      backports_1.1.2   scales_0.5.0     
[28] codetools_0.2-15  htmltools_0.3.6   MASS_7.3-47      
[31] assertthat_0.2.0  colorspace_1.3-2  stringi_1.1.6    
[34] lazyeval_0.2.1    pscl_1.5.2        doParallel_1.0.11
[37] munsell_0.4.3     truncnorm_1.0-7   SQUAREM_2017.10-1

This R Markdown site was created with workflowr

Immune Flash Covariance individual

Yuxin Zou

2017-12-09

Set up the data

Generate covariance matrices for each row

Session information