Last updated: 2017-01-15
Code version: b05decc05714ddcb20aa04e56b557d5123d178fb
This file makes two figures:
Illustrates basic idea of way q value does FDR analysis in p value space.
Compares methods in the way they decompose p values and z scores into two groups.
First, we load the necessary libraries and function definitions.
library(ashr)
library(qvalue)
library(locfdr)
library(mixfdr)
source("../R/plot_FDReg_hist.R")
source("../R/nullalthist.R")
Generate a small data set, and run the different methods on this data set.
# ncz is the number of bins in the z score histograms.
ncz = 100
ntest = 10000
set.seed(111)
# Simulate (with all tests alternative, true effects $\beta \sim N(0,1)$).
beta = rnorm(ntest)
sebetahat = 1
betahat = beta + rnorm(ntest,0,sebetahat)
zscore = betahat/sebetahat
pval = pchisq(zscore^2,df = 1,lower.tail = FALSE)
# Apply the different methods.
res.qvalue = qvalue(pval)
res.locfdr = locfdr(zscore,nulltype = 0,plot = 0)
res.mixfdr = mixFdr(zscore,noiseSD = 1,theonull = TRUE,plot = FALSE)
Warning in mixFdr(zscore, noiseSD = 1, theonull = TRUE, plot = FALSE):
Assuming known noise noiseSD = 1 . If needed rerun with noiseSD = NA to fit
noiseSD.
Fitting preliminary model
Fitting final model
Warning in mixFdr(zscore, noiseSD = 1, theonull = TRUE, plot = FALSE): Null
proportion pi0 is small. Consider increasing penalization and/or using an
empirical null.
Warning in mixFdr(zscore, noiseSD = 1, theonull = TRUE, plot = FALSE):
Using an empirical null with a fitted noiseSD gives a substantially
different model. Consider rerunning with theonull = FALSE and noiseSD = NA.
Fitted Model: J = 3 groups
----------------------------
null? TRUE FALSE FALSE
pi = 0.8392 0.0797 0.0811
mu = 0.000 2.161 -2.121
sigma = 1 1 1
noiseSD = 1
res.ash = ash(betahat,1,method = "fdr")
# Roughly compute a local fdr for qvalue (to aid the plotting of the
# decomposition in zscore space) in each bin of histogram. I set the
# threshold at 1 so that fdr is never larger than 1.
temp = hist(pval,nclass = 50)
bin_fdr = res.qvalue$pi0/temp$density
qval_fdr = bin_fdr[as.numeric(cut(pval,temp$breaks))]
qval_fdr = pmin(1,qval_fdr)
qval_fdr = qvalue::lfdr(pval)
Here’s an example of how qvalue decomposes \(p\) values into null and alternative components. First the \(p\) values:
plot_FDReg_hist(pval,res.qvalue$pi0,type = 1,title = "p values",
cex.axis = 0.8,cex.main = 0.8)
Now plot the decomposition:
plot_FDReg_hist(pval,res.qvalue$pi0,type = 4,cex.axis = 0.8,yaxt = 'n',
textsize = 0.9,cex.main = 0.8,
title = "Decomposition into null/alternative")
axis(side = 2,labels = FALSE,tck = -0.01)
Show the distribution of the p-values and the z-scores, stratified by their classification into the “null” and “alternative”.
layout(matrix(1:12,ncol = 3,byrow = FALSE))
plotlabel = function(label,cex = 1.5){
plot(0,0,type = "n",axes = FALSE,xlab = "",ylab = "")
text(0,0,label,cex = cex)
}
plotlabel("qvalue")
plotlabel("locfdr")
plotlabel("mixfdr")
plotlabel("ash")
par(mar = c(1.3,2,1.5,0.2),mgp = c(3, 0.5, 0))
# p value histograms
altnullhist(pval,qval_fdr,main = "p values",ncz = 50,xaxt = 'n',cex.axis = 0.8)
axis(side = 1, labels = FALSE,tck = -0.01)
altnullhist(pval,res.locfdr$fdr,main = "",ncz = 50,xaxt = 'n',cex.axis = 0.8)
axis(side = 1, labels = FALSE,tck = -0.01)
altnullhist(pval,res.mixfdr$fdr,main = "",ncz = 50,xaxt = 'n',cex.axis = 0.8)
axis(side = 1, labels = FALSE,tck = -0.01)
altnullhist(pval,get_lfdr(res.ash),main = "",ncz = 50,xaxt = 'n',cex.axis = 0.8)
axis(side = 1, labels = TRUE,tck = -0.01,cex.axis = 0.8)
# z score histograms
nullalthist(zscore,qval_fdr,main = "z scores",ncz = 50,xaxt = 'n',
cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = FALSE,tck = -0.01)
nullalthist(zscore,res.locfdr$fdr,main = "",ncz = 50,xaxt = 'n',
cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = FALSE,tck = -0.01)
nullalthist(zscore,res.mixfdr$fdr,main = "",ncz = 50,xaxt = 'n',
cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = FALSE,tck = -0.01)
nullalthist(zscore,get_lfdr(res.ash),main = "",ncz = 50,xaxt = 'n',
cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = TRUE,tck = -0.01,cex.axis = 0.8)
This one is a different layout (4 columns, 2 rows) for my Tukey poster
# pdf("../figures/decomp_ZA_poster.pdf",width=6,height=2)
layout(matrix(1:8,ncol = 4,byrow = TRUE))
par(mar = c(1.3,2,1.5,0.2),mgp = c(3, 0.5, 0))
ncz = 25
# p value histograms
altnullhist(pval,qval_fdr,main = "p values: qvalue",ncz = ncz,
xaxt = 'n',cex.axis = 0.8)
# plot_FDReg_hist(pval,res.qvalue$pi0,type = 2,title = "p values",
# xaxt = 'n',cex.axis = 0.8)
axis(side = 1,labels = FALSE,tck = -0.01)
# mtext(side = 3,"p values",line = 1)
altnullhist(pval,res.locfdr$fdr,main = "locfdr",ncz = ncz,xaxt = 'n',
cex.axis = 0.8)
axis(side = 1,labels = FALSE,tck = -0.01)
altnullhist(pval,res.mixfdr$fdr,main = "mixfdr",ncz = ncz,xaxt = 'n',
cex.axis = 0.8)
axis(side = 1,labels = FALSE,tck = -0.01)
altnullhist(pval,get_lfdr(res.ash),main = "ash",ncz = ncz,xaxt = 'n',
cex.axis = 0.8)
axis(side = 1,labels = TRUE,tck = -0.01,cex.axis = 0.8)
# mtext(side = 1,"p values",line = 2)
#z score histograms
nullalthist(zscore,qval_fdr,main = "z scores: qvalue",ncz = ncz,xaxt = 'n',
cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = FALSE,tck = -0.01)
# mtext(side = 3,"z scores",line = 1)
nullalthist(zscore,res.locfdr$fdr,main = "locfdr",ncz = ncz,xaxt = 'n',
cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = FALSE,tck = -0.01)
nullalthist(zscore,res.mixfdr$fdr,main = "mixfdr",ncz = ncz,xaxt = 'n',
cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = FALSE,tck = -0.01)
nullalthist(zscore,get_lfdr(res.ash),main = "ash",ncz = ncz,xaxt = 'n',
cex.axis = 0.8,ylim = c(0,0.3),xlim = c(-6,6))
axis(side = 1,labels = TRUE,tck = -0.01,cex.axis = 0.8)
# mtext(side = 1,"z scores",line = 2)
# dev.off()
sessionInfo()
R version 3.3.2 (2016-10-31)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Sierra 10.12.2
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] knitr_1.15.1 mixfdr_1.0 locfdr_1.1-8 qvalue_2.4.2 ashr_2.0.5
loaded via a namespace (and not attached):
[1] Rcpp_0.12.8 magrittr_1.5 REBayes_0.63
[4] splines_3.3.2 MASS_7.3-45 munsell_0.4.3
[7] doParallel_1.0.10 pscl_1.4.9 colorspace_1.2-6
[10] SQUAREM_2016.8-2 lattice_0.20-34 foreach_1.4.3
[13] plyr_1.8.4 stringr_1.1.0 tools_3.3.2
[16] parallel_3.3.2 grid_3.3.2 gtable_0.2.0
[19] htmltools_0.3.5 iterators_1.0.8 assertthat_0.1
[22] lazyeval_0.2.0 yaml_2.1.14 rprojroot_1.1
[25] digest_0.6.10 tibble_1.2 Matrix_1.2-7.1
[28] reshape2_1.4.2 ggplot2_2.2.0 codetools_0.2-15
[31] evaluate_0.10 rmarkdown_1.3 stringi_1.1.2
[34] Rmosek_7.1.3 scales_0.4.1 backports_1.0.4
[37] truncnorm_1.0-7