Last updated: 2017-04-18
Code version: b3eafbf4187bdce919f7b35ef7505fa7a3095c8c
First, we load the necessary libraries and useful function definitions.
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
library(ashr)
library(ggplot2)
library(dscr)
library(REBayes)
## Loading required package: Matrix
library(ashr)
source("../code/dsc-shrink/methods/mixfdr.wrapper.R")
source("../R/set_plot_colors.R")
Define a couple functions that compile CDF data in such a way that it is easy to plot the CDFs using ggplot2.
load("../output/dsc-shrink-files/res.RData")
# df is a list with components method,seed, scenario
# cdf is evaluated at x
# returns list of x,y,pi0, where cdf values are in y
get_cdf = function (df,dsc = dsc_shrink,x = seq(-6,6,length = 100),
homedir = "../code/dsc-shrink") {
m = df$method
if (m == "truth")
m = "ash.n"
temp = load_example(dsc,df$seed,df$scenario,m,homedir)
pi0 = temp$meta$pi0
if (df$method == "truth") {
s = dsc$scenarios[[df$scenario]]
galt = s$args$g
g = normalmix(c(pi0,(1 - pi0) * mixprop(galt)),
c(0,comp_mean(galt)),
c(0,comp_sd(galt)))
} else {
if (grepl("mixfdr",df$method)) {
temp$output$fitted_g = mixfdr2fitted.g(temp$output)$fitted.g
}
g = temp$output$fitted_g
}
return(data.frame(x = x,y = as.numeric(mixcdf(g,x)),pi0 = pi0))
}
plot_mean_cdf =
function(SEEDS,
PLOTMETHODS = c("ash.n","ash.u","ash.hu","truth","mixfdr.tnull"),
PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew","bimodal"),
pi0filter = FALSE,...) {
PLOTNAMES = PLOTSCENARIOS
#set up dataframe with cdf for all methods and all datasets
df = expand.grid(seed = SEEDS,scenario = PLOTSCENARIOS,
method = PLOTMETHODS,stringsAsFactors = FALSE)
df.cdf = ddply(df,.(seed,scenario,method),get_cdf)
if (pi0filter)
df.cdf %<>% filter(pi0 < 0.55 & pi0 > 0.45)
if (length(SEEDS) > 1)
df.cdf %<>% group_by(x,method,scenario) %>% summarise(y = mean(y))
df.cdf$scenario = factor(df.cdf$scenario,levels = PLOTSCENARIOS)
levels(df.cdf$scenario) = PLOTNAMES
return(ggplot(df.cdf,aes(x = x,y = y,color = method),...) + colScale +
geom_line(lwd = 1.5,alpha = 0.7) + facet_grid(.~scenario) +
theme(legend.position = "bottom"))
}
Show the CDFs for all methods and all scenarios, based on a single data set.
# These chunk options were used to create the figure for the paper:
# dev='pdf', fig.height=3, fig.width=9, crop=TRUE
plot_mean_cdf(1)
Show the CDFs for all scenarios, this time averaged over 100 simulated data sets.
# These chunk options were used to create the figure for the paper:
# dev='pdf', fig.height=3, fig.width=9, crop=TRUE
plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n","ash.u","ash.hu","truth"),
pi0filter = TRUE)
Same as above, but with custom colours.
# These chunk options were used to create the figure for the paper:
# dev='pdf', fig.height=3, fig.width=9, crop=TRUE
names(myColors) <- c("truth","ash.hu.s","ash.n.s","ash.u.s","qvalue",
"locfdr","mixfdr.tnull")
colScale <- scale_colour_manual(name = "method",values = myColors)
plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n.s","ash.u.s","ash.hu.s","truth"),
pi0filter = TRUE)
# Reset color scale for other plots.
source("../R/set_plot_colors.R")
The following plots have fewer methods and scenarios for more clarity
# These chunk options were used to create the figure for the paper:
# fig.height=3, fig.width=6, crop=TRUE, dev='pdf'
plot_mean_cdf(1,PLOTMETHODS = c("ash.n","truth"),
PLOTSCENARIOS = c("spiky","near-normal","bimodal"))
Same as above, but this time the CDFs are averaged over 100 simulated data sets.
# These chunk options were used to create the figure for the paper:
# dev='pdf', fig.height=3, fig.width=6, crop=TRUE
plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n","truth"),
PLOTSCENARIOS = c("spiky","near-normal","bimodal"),
pi0filter = TRUE)
Now with custom colours.
# These chunk options were used to create the figure for the paper:
# fig.height=3, fig.width=6, crop=TRUE, dev='pdf'
names(myColors) <- c("truth","ash.hu.s","ash.n.s","ash.u.s","qvalue","locfdr",
"mixfdr.tnull")
colScale <- scale_colour_manual(name = "method",values = myColors)
plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n.s","truth"),
PLOTSCENARIOS = c("spiky","near-normal","bimodal"),
pi0filter = TRUE)
source("../R/set_plot_colors.R")
I wanted to add the npmle method (fit using the GLmix function) to the plot…
# These chunk options were used to create the figure for the paper:
# dev='pdf', fig.height=4.5, fig.width=6, crop=TRUE
run_nplme = function (SEED = 1,PLOTSCENARIOS = c("spiky","near-normal",
"flat-top","skew","bimodal")) {
df = data.frame(seed = NULL,scenario = NULL,method = NULL,x = NULL,
y = NULL,pi0 = NULL)
for (SCENARIO in PLOTSCENARIOS) {
temp = load_example(dsc_shrink,SEED,SCENARIO,"ash.n","../output/dsc-shrink-files/")
z = GLmix(temp$input$betahat)
df = rbind(df,data.frame(seed = SEED,scenario = SCENARIO,method = "NPMLE",
x = z$x,y = cumsum(z$y)/sum(z$y),pi0 = NA))
}
return(df)
}
plot_mean_cdf_with_npmle =
function(SEED = 1,
PLOTMETHODS = c("ash.n","ash.u","ash.hu","truth","mixfdr.tnull"),
PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew","bimodal"),
pi0filter = FALSE,...) {
if (length(SEED) > 1)
stop("plot with npmle only implemented for a single seed")
PLOTNAMES = PLOTSCENARIOS
# Set up dataframe with cdf for all methods and all datasets.
df = expand.grid(seed = SEED,scenario = PLOTSCENARIOS,method = PLOTMETHODS,
stringsAsFactors = FALSE)
df.cdf = ddply(df,.(seed,scenario,method),get_cdf)
df.cdf$scenario = factor(df.cdf$scenario,levels = PLOTSCENARIOS)
levels(df.cdf$scenario) = PLOTNAMES
df.npmle = run_nplme(SEED,PLOTSCENARIOS)
df.cdf = rbind(df.cdf,df.npmle)
p = ggplot(df.cdf,aes(x = x,y = y,color = method),...) +
xlim(c(-4,4)) + colScale + geom_line(lwd = 1.2,alpha = 0.7) +
theme(legend.position = "bottom")
return(p)
}
plot_mean_cdf_with_npmle(1,PLOTMETHODS = c("ash.hu","truth"),
PLOTSCENARIOS = c("spiky","near-normal","flat-top",
"skew","bimodal")) +
facet_wrap(~ scenario, nrow = 2)
Warning: Removed 164 rows containing missing values (geom_path).
sessionInfo()
R version 3.3.2 (2016-10-31)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Sierra 10.12.4
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] knitr_1.15.1 RColorBrewer_1.1-2 mixfdr_1.0
[4] REBayes_0.73 Matrix_1.2-8 dscr_0.1.1
[7] ggplot2_2.2.1 ashr_2.1-7 magrittr_1.5
[10] dplyr_0.5.0 plyr_1.8.4
loaded via a namespace (and not attached):
[1] Rcpp_0.12.10 iterators_1.0.8 tools_3.3.2
[4] digest_0.6.12 evaluate_0.10 tibble_1.2
[7] gtable_0.2.0 lattice_0.20-34 foreach_1.4.3
[10] shiny_1.0.0 DBI_0.5-1 yaml_2.1.14
[13] parallel_3.3.2 stringr_1.2.0 rprojroot_1.2
[16] grid_3.3.2 R6_2.2.0 rmarkdown_1.3
[19] reshape2_1.4.2 backports_1.0.5 scales_0.4.1
[22] codetools_0.2-15 htmltools_0.3.5 MASS_7.3-45
[25] assertthat_0.1 mime_0.5 colorspace_1.3-2
[28] xtable_1.8-2 httpuv_1.3.3 labeling_0.3
[31] stringi_1.1.2 Rmosek_7.1.3 lazyeval_0.2.0
[34] pscl_1.4.9 doParallel_1.0.10 munsell_0.4.3
[37] truncnorm_1.0-7 SQUAREM_2016.8-2