Last updated: 2017-01-15

Code version: b05decc05714ddcb20aa04e56b557d5123d178fb

This file makes two figures:

  1. Illustrates basic idea of way q value does FDR analysis in p value space.

  2. Compares methods in the way they decompose p values and z scores into two groups.

First, we load the necessary libraries and function definitions.

library(ashr)
library(qvalue)
library(locfdr)
library(mixfdr)
source("../R/plot_FDReg_hist.R")
source("../R/nullalthist.R")

Simple simulated example

Generate a small data set, and run the different methods on this data set.

# ncz is the number of bins in the z score histograms.
ncz   = 100
ntest = 10000
set.seed(111)

# Simulate (with all tests alternative, true effects $\beta \sim N(0,1)$).
beta      = rnorm(ntest)
sebetahat = 1
betahat   = beta + rnorm(ntest,0,sebetahat)
zscore    = betahat/sebetahat
pval      = pchisq(zscore^2,df = 1,lower.tail = FALSE)

# Apply the different methods.
res.qvalue = qvalue(pval)
res.locfdr = locfdr(zscore,nulltype = 0,plot = 0)
res.mixfdr = mixFdr(zscore,noiseSD = 1,theonull = TRUE,plot = FALSE)
Warning in mixFdr(zscore, noiseSD = 1, theonull = TRUE, plot = FALSE):
Assuming known noise noiseSD = 1 . If needed rerun with noiseSD = NA to fit
noiseSD.
Fitting preliminary model 
Fitting final model 
Warning in mixFdr(zscore, noiseSD = 1, theonull = TRUE, plot = FALSE): Null
proportion pi0 is small. Consider increasing penalization and/or using an
empirical null.
Warning in mixFdr(zscore, noiseSD = 1, theonull = TRUE, plot = FALSE):
Using an empirical null with a fitted noiseSD gives a substantially
different model. Consider rerunning with theonull = FALSE and noiseSD = NA.

Fitted Model: J = 3 groups
----------------------------
null?        TRUE   FALSE   FALSE   

pi =        0.8392  0.0797  0.0811  

mu =         0.000   2.161  -2.121  

sigma =     1   1   1   

noiseSD =   1   
res.ash    = ash(betahat,1,method = "fdr")

# Roughly compute a local fdr for qvalue (to aid the plotting of the
# decomposition in zscore space) in each bin of histogram. I set the 
# threshold at 1 so that fdr is never larger than 1.
temp     = hist(pval,nclass = 50)

bin_fdr  = res.qvalue$pi0/temp$density 
qval_fdr = bin_fdr[as.numeric(cut(pval,temp$breaks))]
qval_fdr = pmin(1,qval_fdr)
qval_fdr = qvalue::lfdr(pval)

Here’s an example of how qvalue decomposes \(p\) values into null and alternative components. First the \(p\) values:

plot_FDReg_hist(pval,res.qvalue$pi0,type = 1,title = "p values",
                cex.axis = 0.8,cex.main = 0.8)

Now plot the decomposition:

plot_FDReg_hist(pval,res.qvalue$pi0,type = 4,cex.axis = 0.8,yaxt = 'n',
                textsize = 0.9,cex.main = 0.8,
                title = "Decomposition into null/alternative")
axis(side = 2,labels = FALSE,tck = -0.01)

Show the distribution of the p-values and the z-scores, stratified by their classification into the “null” and “alternative”.

layout(matrix(1:12,ncol = 3,byrow = FALSE))
plotlabel = function(label,cex = 1.5){
  plot(0,0,type = "n",axes = FALSE,xlab = "",ylab = "")
  text(0,0,label,cex = cex)
}
plotlabel("qvalue")
plotlabel("locfdr")
plotlabel("mixfdr")
plotlabel("ash")

par(mar = c(1.3,2,1.5,0.2),mgp = c(3, 0.5, 0))

# p value histograms
altnullhist(pval,qval_fdr,main = "p values",ncz = 50,xaxt = 'n',cex.axis = 0.8)
axis(side = 1, labels = FALSE,tck = -0.01)
altnullhist(pval,res.locfdr$fdr,main = "",ncz = 50,xaxt = 'n',cex.axis = 0.8)
axis(side = 1, labels = FALSE,tck = -0.01)
altnullhist(pval,res.mixfdr$fdr,main = "",ncz = 50,xaxt = 'n',cex.axis = 0.8)
axis(side = 1, labels = FALSE,tck = -0.01)
altnullhist(pval,get_lfdr(res.ash),main = "",ncz = 50,xaxt = 'n',cex.axis = 0.8)
axis(side = 1, labels = TRUE,tck = -0.01,cex.axis = 0.8)

# z score histograms
nullalthist(zscore,qval_fdr,main = "z scores",ncz = 50,xaxt = 'n',
            cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = FALSE,tck = -0.01)
nullalthist(zscore,res.locfdr$fdr,main = "",ncz = 50,xaxt = 'n',
            cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = FALSE,tck = -0.01)
nullalthist(zscore,res.mixfdr$fdr,main = "",ncz = 50,xaxt = 'n',
            cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = FALSE,tck = -0.01)
nullalthist(zscore,get_lfdr(res.ash),main = "",ncz = 50,xaxt = 'n',
            cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = TRUE,tck = -0.01,cex.axis = 0.8)

This one is a different layout (4 columns, 2 rows) for my Tukey poster

# pdf("../figures/decomp_ZA_poster.pdf",width=6,height=2)
layout(matrix(1:8,ncol = 4,byrow = TRUE))
par(mar = c(1.3,2,1.5,0.2),mgp = c(3, 0.5, 0))
ncz = 25

# p value histograms
altnullhist(pval,qval_fdr,main = "p values: qvalue",ncz = ncz,
            xaxt = 'n',cex.axis = 0.8)
# plot_FDReg_hist(pval,res.qvalue$pi0,type = 2,title = "p values", 
#                 xaxt = 'n',cex.axis = 0.8)
axis(side = 1,labels = FALSE,tck = -0.01)
# mtext(side = 3,"p values",line = 1)
altnullhist(pval,res.locfdr$fdr,main = "locfdr",ncz = ncz,xaxt = 'n',
            cex.axis = 0.8)
axis(side = 1,labels = FALSE,tck = -0.01)
altnullhist(pval,res.mixfdr$fdr,main = "mixfdr",ncz = ncz,xaxt = 'n',
            cex.axis = 0.8)
axis(side = 1,labels = FALSE,tck = -0.01)
altnullhist(pval,get_lfdr(res.ash),main = "ash",ncz = ncz,xaxt = 'n',
            cex.axis = 0.8)
axis(side = 1,labels = TRUE,tck = -0.01,cex.axis = 0.8)
# mtext(side = 1,"p values",line = 2)

#z score histograms
nullalthist(zscore,qval_fdr,main = "z scores: qvalue",ncz = ncz,xaxt = 'n',
            cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = FALSE,tck = -0.01)
# mtext(side = 3,"z scores",line = 1)
nullalthist(zscore,res.locfdr$fdr,main = "locfdr",ncz = ncz,xaxt = 'n',
            cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = FALSE,tck = -0.01)
nullalthist(zscore,res.mixfdr$fdr,main = "mixfdr",ncz = ncz,xaxt = 'n',
            cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = FALSE,tck = -0.01)
nullalthist(zscore,get_lfdr(res.ash),main = "ash",ncz = ncz,xaxt = 'n',
            cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = TRUE,tck = -0.01,cex.axis = 0.8)

# mtext(side = 1,"z scores",line = 2)
# dev.off()

Session information

sessionInfo()
R version 3.3.2 (2016-10-31)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Sierra 10.12.2

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] knitr_1.15.1 mixfdr_1.0   locfdr_1.1-8 qvalue_2.4.2 ashr_2.0.5  

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.8       magrittr_1.5      REBayes_0.63     
 [4] splines_3.3.2     MASS_7.3-45       munsell_0.4.3    
 [7] doParallel_1.0.10 pscl_1.4.9        colorspace_1.2-6 
[10] SQUAREM_2016.8-2  lattice_0.20-34   foreach_1.4.3    
[13] plyr_1.8.4        stringr_1.1.0     tools_3.3.2      
[16] parallel_3.3.2    grid_3.3.2        gtable_0.2.0     
[19] htmltools_0.3.5   iterators_1.0.8   assertthat_0.1   
[22] lazyeval_0.2.0    yaml_2.1.14       rprojroot_1.1    
[25] digest_0.6.10     tibble_1.2        Matrix_1.2-7.1   
[28] reshape2_1.4.2    ggplot2_2.2.0     codetools_0.2-15 
[31] evaluate_0.10     rmarkdown_1.3     stringi_1.1.2    
[34] Rmosek_7.1.3      scales_0.4.1      backports_1.0.4  
[37] truncnorm_1.0-7