Various examples of cdf plots

Last updated: 2017-04-18

Code version: b3eafbf4187bdce919f7b35ef7505fa7a3095c8c

First, we load the necessary libraries and useful function definitions.

library(plyr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(magrittr)
library(ashr)
library(ggplot2)
library(dscr)
library(REBayes)

## Loading required package: Matrix

library(ashr)
source("../code/dsc-shrink/methods/mixfdr.wrapper.R")
source("../R/set_plot_colors.R")

Define a couple functions that compile CDF data in such a way that it is easy to plot the CDFs using ggplot2.

load("../output/dsc-shrink-files/res.RData")

# df is a list with components method,seed, scenario
# cdf is evaluated at x
# returns list of x,y,pi0, where cdf values are in y
get_cdf = function (df,dsc = dsc_shrink,x = seq(-6,6,length = 100),
                    homedir = "../code/dsc-shrink") {
  m = df$method
  if (m == "truth")
    m = "ash.n"
  temp = load_example(dsc,df$seed,df$scenario,m,homedir)
  pi0  = temp$meta$pi0
  
  if (df$method == "truth") {
    s    = dsc$scenarios[[df$scenario]]
    galt = s$args$g
    g    = normalmix(c(pi0,(1 - pi0) * mixprop(galt)),
                     c(0,comp_mean(galt)),
                     c(0,comp_sd(galt)))
  } else {
    if (grepl("mixfdr",df$method)) {
      temp$output$fitted_g = mixfdr2fitted.g(temp$output)$fitted.g
    }
    g = temp$output$fitted_g
  }
  return(data.frame(x = x,y = as.numeric(mixcdf(g,x)),pi0 = pi0))
}

plot_mean_cdf = 
  function(SEEDS,
           PLOTMETHODS   = c("ash.n","ash.u","ash.hu","truth","mixfdr.tnull"),
           PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew","bimodal"),
           pi0filter     = FALSE,...) {
  PLOTNAMES = PLOTSCENARIOS
  
  #set up dataframe with cdf for all methods and all datasets
  df     = expand.grid(seed = SEEDS,scenario = PLOTSCENARIOS,
                       method = PLOTMETHODS,stringsAsFactors = FALSE)
  df.cdf = ddply(df,.(seed,scenario,method),get_cdf)
  
  if (pi0filter)
    df.cdf %<>% filter(pi0 < 0.55 & pi0 > 0.45)
  if (length(SEEDS) > 1)
    df.cdf %<>% group_by(x,method,scenario) %>% summarise(y = mean(y))
  
  df.cdf$scenario         = factor(df.cdf$scenario,levels = PLOTSCENARIOS)
  levels(df.cdf$scenario) = PLOTNAMES

  return(ggplot(df.cdf,aes(x = x,y = y,color = method),...) + colScale + 
    geom_line(lwd = 1.5,alpha = 0.7) + facet_grid(.~scenario) + 
      theme(legend.position = "bottom"))
}

Show the CDFs for all methods and all scenarios, based on a single data set.

# These chunk options were used to create the figure for the paper:
# dev='pdf', fig.height=3, fig.width=9, crop=TRUE
plot_mean_cdf(1)

Show the CDFs for all scenarios, this time averaged over 100 simulated data sets.

# These chunk options were used to create the figure for the paper:
# dev='pdf', fig.height=3, fig.width=9, crop=TRUE
plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n","ash.u","ash.hu","truth"),
              pi0filter = TRUE)

Same as above, but with custom colours.

# These chunk options were used to create the figure for the paper:
# dev='pdf', fig.height=3, fig.width=9, crop=TRUE
names(myColors) <- c("truth","ash.hu.s","ash.n.s","ash.u.s","qvalue",
                     "locfdr","mixfdr.tnull")
colScale <- scale_colour_manual(name = "method",values = myColors)
plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n.s","ash.u.s","ash.hu.s","truth"),
              pi0filter = TRUE)

# Reset color scale for other plots.
source("../R/set_plot_colors.R")

The following plots have fewer methods and scenarios for more clarity

# These chunk options were used to create the figure for the paper:
# fig.height=3, fig.width=6, crop=TRUE, dev='pdf'
plot_mean_cdf(1,PLOTMETHODS = c("ash.n","truth"),
              PLOTSCENARIOS = c("spiky","near-normal","bimodal"))

Same as above, but this time the CDFs are averaged over 100 simulated data sets.

# These chunk options were used to create the figure for the paper:
# dev='pdf', fig.height=3, fig.width=6, crop=TRUE
plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n","truth"),
              PLOTSCENARIOS = c("spiky","near-normal","bimodal"),
              pi0filter = TRUE)

Now with custom colours.

# These chunk options were used to create the figure for the paper:
# fig.height=3, fig.width=6, crop=TRUE, dev='pdf'
names(myColors) <- c("truth","ash.hu.s","ash.n.s","ash.u.s","qvalue","locfdr",
                     "mixfdr.tnull")
colScale        <- scale_colour_manual(name = "method",values = myColors)
plot_mean_cdf(1:100,PLOTMETHODS = c("ash.n.s","truth"),
              PLOTSCENARIOS = c("spiky","near-normal","bimodal"),
              pi0filter = TRUE)

source("../R/set_plot_colors.R")

I wanted to add the npmle method (fit using the GLmix function) to the plot…

# These chunk options were used to create the figure for the paper:
# dev='pdf', fig.height=4.5, fig.width=6, crop=TRUE
run_nplme = function (SEED = 1,PLOTSCENARIOS = c("spiky","near-normal",
                                                 "flat-top","skew","bimodal")) {
  df = data.frame(seed = NULL,scenario = NULL,method = NULL,x = NULL,
                  y = NULL,pi0 = NULL)
  for (SCENARIO in PLOTSCENARIOS) {
    temp = load_example(dsc_shrink,SEED,SCENARIO,"ash.n","../output/dsc-shrink-files/")
    z    = GLmix(temp$input$betahat)
    df   = rbind(df,data.frame(seed = SEED,scenario = SCENARIO,method = "NPMLE",
                               x = z$x,y = cumsum(z$y)/sum(z$y),pi0 = NA))
  }
  return(df)
}

plot_mean_cdf_with_npmle = 
  function(SEED          = 1,
           PLOTMETHODS   = c("ash.n","ash.u","ash.hu","truth","mixfdr.tnull"),
           PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew","bimodal"),
           pi0filter     = FALSE,...) {
  if (length(SEED) > 1)
    stop("plot with npmle only implemented for a single seed")
  PLOTNAMES = PLOTSCENARIOS
  
  # Set up dataframe with cdf for all methods and all datasets.
  df = expand.grid(seed = SEED,scenario = PLOTSCENARIOS,method = PLOTMETHODS,
                   stringsAsFactors = FALSE)
  df.cdf = ddply(df,.(seed,scenario,method),get_cdf)
  
  df.cdf$scenario         = factor(df.cdf$scenario,levels = PLOTSCENARIOS)
  levels(df.cdf$scenario) = PLOTNAMES
  
  df.npmle = run_nplme(SEED,PLOTSCENARIOS)
  df.cdf   = rbind(df.cdf,df.npmle)
  
  p = ggplot(df.cdf,aes(x = x,y = y,color = method),...) + 
    xlim(c(-4,4)) + colScale + geom_line(lwd = 1.2,alpha = 0.7) + 
    theme(legend.position = "bottom")
  return(p)
}

plot_mean_cdf_with_npmle(1,PLOTMETHODS = c("ash.hu","truth"),
                         PLOTSCENARIOS = c("spiky","near-normal","flat-top",
                                           "skew","bimodal")) +
  facet_wrap(~ scenario, nrow = 2)

Warning: Removed 164 rows containing missing values (geom_path).

Session information

sessionInfo()

R version 3.3.2 (2016-10-31)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Sierra 10.12.4

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] knitr_1.15.1       RColorBrewer_1.1-2 mixfdr_1.0        
 [4] REBayes_0.73       Matrix_1.2-8       dscr_0.1.1        
 [7] ggplot2_2.2.1      ashr_2.1-7         magrittr_1.5      
[10] dplyr_0.5.0        plyr_1.8.4        

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.10      iterators_1.0.8   tools_3.3.2      
 [4] digest_0.6.12     evaluate_0.10     tibble_1.2       
 [7] gtable_0.2.0      lattice_0.20-34   foreach_1.4.3    
[10] shiny_1.0.0       DBI_0.5-1         yaml_2.1.14      
[13] parallel_3.3.2    stringr_1.2.0     rprojroot_1.2    
[16] grid_3.3.2        R6_2.2.0          rmarkdown_1.3    
[19] reshape2_1.4.2    backports_1.0.5   scales_0.4.1     
[22] codetools_0.2-15  htmltools_0.3.5   MASS_7.3-45      
[25] assertthat_0.1    mime_0.5          colorspace_1.3-2 
[28] xtable_1.8-2      httpuv_1.3.3      labeling_0.3     
[31] stringi_1.1.2     Rmosek_7.1.3      lazyeval_0.2.0   
[34] pscl_1.4.9        doParallel_1.0.10 munsell_0.4.3    
[37] truncnorm_1.0-7   SQUAREM_2016.8-2

Various examples of cdf plots

Matthew Stephens

2015-10-26

Session information