DSC configuration script


# Covariance Estimation
# Wei Wang and Gao Wang (c) 2016, 2017
#
# Data
#

diagonal:
  exec: diagonal.R
  seed: R(1:20)
  params:
    n: 200
    P: 10, 100, 300, 1000
  return: Xtrain = R(out$Xtrain),
          Xtest = R(out$Xtest),
          Omega_True = R(out$Omega)

toeplitz(diagonal):
  exec: toeplitz.R

sparse_edors_renyi_graph(diagonal):
  exec: graph.R
  .alias: sparse_edors_renyi_graph
  params:
    g_type: NULL
    para: NULL

dense_edors_renyi_graph(sparse_edors_renyi_graph):
  .alias: dense_edors_renyi_graph
  params:
    para: 0.5

band_graph(sparse_edors_renyi_graph):
  .alias: band_graph
  params:
    g_type: band
    para: 3

cluster_graph(sparse_edors_renyi_graph):
  .alias: cluster_graph
  params:
    g_type: cluster
    para: 5

hub_graph(sparse_edors_renyi_graph):
  .alias: hub_graph
  params:
    g_type: hub
    para: 5

scale_free_graph(sparse_edors_renyi_graph):
  .alias: scale_free_graph
  params:
    g_type: scale-free

#
# Methods
#

inv_cov:
  exec: inv_cov.R
  params:
    X: $Xtrain
  return: Omega

inv_diag(inv_cov):
  exec: inv_diag.R

glasso(inv_cov):
  exec: glasso.R

tiger(inv_cov):
  exec: tiger.R

clime(inv_cov):
  exec: clime.R

flash(inv_cov):
  exec: flash.R
  params:
    est_K: max
    Psi_type: diag

hybrid_flash(flash):
  .alias: hybrid_flash
  params:
    est_K: bcv
    Psi_type: sparse

SFAmix(flash):
  exec: SFAmix.R
  params:
    sfain: File(sfa)

hybrid_SFAmix(SFAmix):
  .alias: hybrid_SFAmix
  params:
    est_K: bcv
    Psi_type: sparse

#
# Scores
#

score:
  exec: loglik.R, hyvarinen.R, mse.R
  params:
    Xtest: $Xtest
    Omega: $Omega
  return:
    exec[1]: score = loglik
    exec[2]: score = Hscore
    exec[3]: score = MSE

DSC:
  run:
    all: (diagonal, toeplitz, sparse_edors_renyi_graph, dense_edors_renyi_graph,
          band_graph, cluster_graph, hub_graph, scale_free_graph) * (inv_cov, inv_diag, glasso,
          tiger, clime, SFAmix, flash, hybrid_SFAmix, hybrid_flash) * score
  output: output/omega
  exec_path: src/datamaker, src/method, src/score
  lib_path: src/lib
  R_libs: MASS, huge, camel, cate, stephenslab/flashr, REBayes

From src/lib

est_camel = function(X,type = c("slasso", "clime")){
  out1 = camel::camel.tiger(X,method = type)
  #model selection using cross validation
  out.select = camel::camel.tiger.select(out1, criterion = "cv")
  if (!is.null(out.select$opt.icov))
    Omega = as.matrix(out.select$opt.icov)
  else
    Omega = NULL
  Omega
}


est_invPsi = function(res=NULL,sigma = NULL, type = c("diag","sparse")){
  if(type == "diag"){
    InvPsi = diag(sigma^(-1))
  }else{
    # here I use tiger, we can also use glasso
    out1 = camel::camel.tiger(res, method = "slasso")
    #model selection using cross validation
    out.select = camel::camel.tiger.select(out1, criterion = "cv")
    if (!is.null(out.select$opt.icov))
      InvPsi = as.matrix(out.select$opt.icov)
    else
      InvPsi = NULL

  }
  return(InvPsi)
}


est_rank = function(X,type = c("max","bcv")){
  if(type == "max"){
    K = min(dim(X))
  }else{
    K_list = cate::est.factor.num(X, method = c("bcv"))
    K = as.numeric(K_list$r)
  }
  return(K)
}


est_omega_wood = function(n,P,Lhat,Fhat,invPsi,type){
  if(type == "inv_L"){
    invC=solve( (1/n) * ( t(Lhat) %*% Lhat ) )
  }else{
    invC = as.vector((diag((1/n) * ( t(Lhat) %*% Lhat )))^(-1))
    if(length(invC) == 1){
      invC = matrix(invC,nrow = 1,ncol = 1)
    }else{
      invC = diag(invC)
    }
  }
  # based on woodbury matrix identity
  Omega = invPsi - invPsi %*% Fhat %*% solve(invC + t(Fhat) %*% invPsi %*% Fhat) %*% t(Fhat) %*% invPsi
  return(Omega)
}


DM_diagonal = function(n,P){
  index1=sort(sample(seq(1:n),(n/2)))
  index2=seq(1:n)[-index1]
  Sigma = diag(rchisq(P,3))
  data = MASS::mvrnorm(n,rep(0,P),Sigma)
  Xtest = data[index2,]
  Xtrain = data[index1,]
  Omega = diag((1/rchisq(P,3)))
  return(list(Xtrain = Xtrain, Xtest = Xtest, Omega = Omega))
}

DM_toeplitz = function(n,P){
  index1=sort(sample(seq(1:n),(n/2)))
  index2=seq(1:n)[-index1]
  Sigmatp=function(P){
    a=array(0,dim=c(P,P))
    for(i in 1:P){
      for(j in 1:P){
        a[i,j]=max(1-0.1*(abs(i-j)),0)
      }
    }
    return(a)
  }
  Sigma = Sigmatp(P)
  data = MASS::mvrnorm(n,rep(0,P),Sigma)
  Xtest = data[index2,]
  Xtrain = data[index1,]
  Omega = solve(Sigma)
  return(list(Xtrain = Xtrain, Xtest = Xtest, Omega = Omega))
}

DM_graph = function(n,P,g_type = NULL,para = NULL){
  index1=sort(sample(seq(1:n),(n/2)))
  index2=seq(1:n)[-index1]
  if(is.null(g_type)){
    # default is random graph
    data_list = huge::huge.generator(n = n, d = P, prob = para, vis = TRUE)
  }else{
    data_list = huge::huge.generator(n = n, d = P, graph = g_type, g = para, vis = TRUE)
  }
  data = data_list$data
  Xtest = data[index2,]
  Xtrain = data[index1,]
  Omega = data_list$omega
  return(list(Xtrain = Xtrain, Xtest = Xtest, Omega = Omega))
}

diagonal

source('simulation.R')
out = DM_diagonal(n,P)

toeplitz

source("simulation.R")
out = DM_toeplitz(n,P)

sparse_edors_renyi_graph

source('simulation.R')
out = DM_graph(n,P,g_type,para)

dense_edors_renyi_graph

source('simulation.R')
out = DM_graph(n,P,g_type,para)

band_graph

source('simulation.R')
out = DM_graph(n,P,g_type,para)

cluster_graph

source('simulation.R')
out = DM_graph(n,P,g_type,para)

hub_graph

source('simulation.R')
out = DM_graph(n,P,g_type,para)

scale_free_graph

source('simulation.R')
out = DM_graph(n,P,g_type,para)

inv_diag

inv_diag = function(X){
  Omega = diag( (diag( cov(X) ))^(-1) )
  return(Omega)
}
Omega = inv_diag(X)

glasso

est_glasso = function(X){
  out.glasso = huge::huge(X, method = "glasso")
  out.select = huge::huge.select(out.glasso,criterion = "ric")
  if (!is.null(out.select$opt.icov))
    Omega = as.matrix(out.select$opt.icov)
  else
    Omega = NULL
  return(Omega)
}
Omega = est_glasso(X)

tiger

source('est_camel.R')
Omega = est_camel(X, "slasso")

clime

source('est_camel.R')
Omega = est_camel(X, "clime")

SFAmix

source('est_rank.R')
source('est_inv_psi.R')
source('est_woodbury.R')
est_sfamix = function(X,type = c("diag_L","inv_L"),
                      est_K = c("max","bcv"), Psi_type = c("diag","sparse"),
                      sfain = "dir_path"){
  K = est_rank(X,type = est_K)
  # write the data into a file which is need in sfamix
  write.table(X, file=sfain, row.names=F,col.names=F)
  tmp_dir = dirname(sfain)
  system(paste("SFAmix","--nf", K, "--y", sfain, "--out",
               tmp_dir,"--sep","space", "> /dev/null", sep=" "))
  # get the result of SFAmix
  alpha=as.matrix(read.table(paste0(tmp_dir, "/PSI")))
  # construct the precision matrix
  if(file.info(paste0(tmp_dir, "/EX"))$size == 1){
    # when the EX are empty which means not factors
    res = X
    Omega = est_invPsi(res=res,sigma = as.vector(alpha), type = Psi_type)
  }else{
    # EX = read.table(paste0(tmp_dir, "/EX")) #L
    # Lambda = read.table(paste0(tmp_dir, "/LAM")) #F
    Lhat = as.matrix(read.table(paste0(tmp_dir, "/EX"))) # n by K
    Fhat = as.matrix(read.table(paste0(tmp_dir, "/LAM"))) # K by P
    Fhat = t(Fhat) # P by K
    res = X - Lhat %*% t(Fhat)
    # to get the inverse of Psi
    invPsi = est_invPsi(res=res,sigma = as.vector(alpha), type = Psi_type)
    # to get the inverse of cov of Lhat
    n = dim(X)[1]
    P = dim(X)[2]
    # based on woodbury matrix identity
    Omega = est_omega_wood(n,P,Lhat,Fhat,invPsi,type)
  }
  return(Omega)
}

Omega = est_sfamix(X, type = "diag_L", est_K = est_K, Psi_type = Psi_type, sfain = sfain)

flash

source('est_rank.R')
source('est_inv_psi.R')
source('est_woodbury.R')
est_flash = function(X,type = c("diag_L","inv_L"),
                     est_K = c("max","bcv"), Psi_type = c("diag","sparse")){
  K = est_rank(X,type = est_K)
  if(K == 1){
    g_flash = flashr::flash(X, partype = "var_col")
  }else{
    g_flash = flashr::greedy(X, K, flash_para = list(partype = "var_col"))
  }
  n = dim(X)[1]
  P = dim(X)[2]
  if(sum(g_flash$l) == 0 || sum(g_flash$f) ==0){
    res = X
    invPsi = est_invPsi(res=res, sigma = as.vector(g_flash$sigmae2), type = Psi_type)
    Omega=invPsi
  } else{
    Lhat = g_flash$l
    Fhat = g_flash$f
    res = X - Lhat %*% t(Fhat)
    invPsi = est_invPsi(res=res,sigma = as.vector(g_flash$sigmae2), type = Psi_type)
    Omega = est_omega_wood(n,P,Lhat,Fhat,invPsi,type)
  }
  return(Omega)
}
Omega = est_flash(X, type = "diag_L", est_K = est_K, Psi_type = Psi_type)

hybrid_SFAmix

source('est_rank.R')
source('est_inv_psi.R')
source('est_woodbury.R')
est_sfamix = function(X,type = c("diag_L","inv_L"),
                      est_K = c("max","bcv"), Psi_type = c("diag","sparse"),
                      sfain = "dir_path"){
  K = est_rank(X,type = est_K)
  # write the data into a file which is need in sfamix
  write.table(X, file=sfain, row.names=F,col.names=F)
  tmp_dir = dirname(sfain)
  system(paste("SFAmix","--nf", K, "--y", sfain, "--out",
               tmp_dir,"--sep","space", "> /dev/null", sep=" "))
  # get the result of SFAmix
  alpha=as.matrix(read.table(paste0(tmp_dir, "/PSI")))
  # construct the precision matrix
  if(file.info(paste0(tmp_dir, "/EX"))$size == 1){
    # when the EX are empty which means not factors
    res = X
    Omega = est_invPsi(res=res,sigma = as.vector(alpha), type = Psi_type)
  }else{
    # EX = read.table(paste0(tmp_dir, "/EX")) #L
    # Lambda = read.table(paste0(tmp_dir, "/LAM")) #F
    Lhat = as.matrix(read.table(paste0(tmp_dir, "/EX"))) # n by K
    Fhat = as.matrix(read.table(paste0(tmp_dir, "/LAM"))) # K by P
    Fhat = t(Fhat) # P by K
    res = X - Lhat %*% t(Fhat)
    # to get the inverse of Psi
    invPsi = est_invPsi(res=res,sigma = as.vector(alpha), type = Psi_type)
    # to get the inverse of cov of Lhat
    n = dim(X)[1]
    P = dim(X)[2]
    # based on woodbury matrix identity
    Omega = est_omega_wood(n,P,Lhat,Fhat,invPsi,type)
  }
  return(Omega)
}

Omega = est_sfamix(X, type = "diag_L", est_K = est_K, Psi_type = Psi_type, sfain = sfain)

hybrid_flash

source('est_rank.R')
source('est_inv_psi.R')
source('est_woodbury.R')
est_flash = function(X,type = c("diag_L","inv_L"),
                     est_K = c("max","bcv"), Psi_type = c("diag","sparse")){
  K = est_rank(X,type = est_K)
  if(K == 1){
    g_flash = flashr::flash(X, partype = "var_col")
  }else{
    g_flash = flashr::greedy(X, K, flash_para = list(partype = "var_col"))
  }
  n = dim(X)[1]
  P = dim(X)[2]
  if(sum(g_flash$l) == 0 || sum(g_flash$f) ==0){
    res = X
    invPsi = est_invPsi(res=res, sigma = as.vector(g_flash$sigmae2), type = Psi_type)
    Omega=invPsi
  } else{
    Lhat = g_flash$l
    Fhat = g_flash$f
    res = X - Lhat %*% t(Fhat)
    invPsi = est_invPsi(res=res,sigma = as.vector(g_flash$sigmae2), type = Psi_type)
    Omega = est_omega_wood(n,P,Lhat,Fhat,invPsi,type)
  }
  return(Omega)
}
Omega = est_flash(X, type = "diag_L", est_K = est_K, Psi_type = Psi_type)

inv_cov

inv_cov = function(X){
  Omega = tryCatch({
    solve(cov(X))
  }, error = function(e) {
    return(NA)
  })
  return(Omega)
}
Omega = inv_cov(X)

score

if (is.null(Omega)) {
  loglik=NA
} else if(any(is.na(Omega))||any(Omega==Inf)){
    loglik = NA
} else {
    S = cov(Xtest)
    tryCatch({
      evlsig= eigen(Omega)$values
      evlsig = evlsig * (1 *( evlsig > 1e-9))
      f1=sum(log(evlsig))
      f2=sum(diag(S %*% Omega))
      loglik=f1-f2
    }, error = function(e) {
      loglik=NA
    })
}


if (is.null(Omega)) {
  Hscore = NA
} else if(any(is.na(Omega))||any(Omega==Inf)){
    Hscore = NA
} else {
    S = cov(Xtest)
    Hscore = (1/2) * sum(diag(Omega %*% S %*% Omega)) - sum(diag(Omega))
}


if (is.null(Omega)) {
  MSE=NA
} else if(any(is.na(Omega))||any(Omega==Inf)){
        MSE = NA
} else {
    nt = dim(Xtest)[1]
    Pt = dim(Xtest)[2]
    MSE=0
        for(i in 1:nt){
           for(j in 1:Pt){
                MSE = MSE + ( Xtest[i,j] + (Omega[j,j])^(-1)*(sum(Omega[,j]*Xtest[i,])-Omega[j,j]*Xtest[i,j]) )^2
           }
        }
}