Plot local false sign rate results

Last updated: 2017-01-15

Code version: b05decc05714ddcb20aa04e56b557d5123d178fb

First, we load the necessary libraries and other useful function definitions.

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(reshape2)
library(ggplot2)
source("../R/set_plot_colors.R")

Load the results of the simulation experiments generated by code/dsc-shrink/run_dsc.R, and prepare the posterior statistics for plotting.

load("../output/dsc-shrink-files/res.RData")

#' @param df dataframe of scores for many methods/scenrios etc
#' @return tall dataframe with columns of scores for each method and the "goldmethod" against which plot is to be made
process_score_for_plotting_against_gold = 
  function(df, 
           PLOTMETHODS   = c("ash.n","ash.u","ash.hu"),
           GOLDMETHOD    = "bayes",PLOTSEEDS = 1:100,
           PLOTSCENARIOS = c("spiky","near-normal","flat-top","skew",
                             "big-normal","bimodal"),
           PLOTNAMES     = PLOTSCENARIOS) {
  df %<>% filter(seed %in% PLOTSEEDS) %>% 
    filter(scenario %in% PLOTSCENARIOS) %>% 
    filter(method %in% c(PLOTMETHODS,GOLDMETHOD))
  df$scenario = factor(df$scenario,levels = PLOTSCENARIOS)
  levels(df$scenario) = PLOTNAMES
  
  # Create "tall"" version of dataframe.
  df %<>% select(-user.self,-sys.self,-elapsed,-user.child,-sys.child) %>%
    melt(id.vars = c("method","scenario","seed",".id"),value.name = "val")

  #separate bayes and remainder
  df.bayes = df %>% filter(method == GOLDMETHOD)
  df.rest  = df %>% filter(method != GOLDMETHOD)

  # Join bayes with others, so each line has both the bayes and the 
  # non-bayes version.
  return(inner_join(df.bayes,df.rest,by = c("scenario","seed","variable")))
}

plot_lfsr = function(lfsr,xlab = "True lfsr",ylab = "Estimated lfsr",
                     xlim = c(0,0.2),ylim = c(0,0.2),
                     legend.position = "bottom")
  ggplot(lfsr,aes(val.x,val.y,colour = method.y)) +
    facet_grid(. ~ scenario) +
    guides(alpha = FALSE) +
    geom_abline(colour = "black") +
    geom_abline(colour = "red",slope = 2) +
    xlab(xlab) + ylab(ylab) +
    geom_point(shape = 1,size = 0.1,alpha = 0.2) +
    scale_y_continuous(limits = ylim) +
    scale_x_continuous(limits = xlim)

lfsr = process_score_for_plotting_against_gold(res$lfsr,PLOTSEEDS = 1:100,
                                               PLOTMETHODS = "ash.n")
lfdr = process_score_for_plotting_against_gold(res$lfdr,PLOTSEEDS = 1:100,
                                               PLOTMETHODS = "ash.n")

p1 = plot_lfsr(lfsr,ylim = c(0,1),xlim = c(0,0.2))
p2 = plot_lfsr(lfdr,ylim = c(0,1),xlim = c(0,0.2),
               xlab = "True lfdr",ylab = "Estimated lfdr")

Separately for each model, create a scatterplot comparing the estimated LFSR against the “gold-standard” LFSR.

print(p1 + theme(legend.position = "none",
                 axis.text.x = element_text(size = 8,angle = 45)) +
      coord_equal(ratio = 1/5) + colScale)

Warning: Removed 418746 rows containing missing values (geom_point).

Separately for each model, create a scatterplot comparing the estimated LFDR against the “gold-standard” LFDR.

print(p2 + theme(legend.position = "none",
                 axis.text.x = element_text(size = 8,angle = 45)) +
      coord_equal(ratio = 1/5) + colScale)

Warning: Removed 358634 rows containing missing values (geom_point).

Separately for each of the ash.n.s methods, create a scatterplot comparing the estimated LFSR against the “gold-standard” LFSR.

lfsr.s = process_score_for_plotting_against_gold(res$lfsr,PLOTSEEDS = 1:100,
                                                 PLOTMETHODS = "ash.n.s")
p1.s   = plot_lfsr(lfsr.s,ylim = c(0,1),xlim = c(0,0.2))
print(p1.s + theme(legend.position = "none",
                   axis.text.x = element_text(size = 8,angle = 45)) +
      coord_equal(ratio = 1/5))

Warning: Removed 418746 rows containing missing values (geom_point).

Separately for each of the ash.n.s methods with the -nn data sets, create a scatterplot comparing the estimated LFSR against the “gold-standard” LFSR.

lfsr.s.nn = 
  process_score_for_plotting_against_gold(res$lfsr,PLOTSEEDS = 1:100,
    PLOTMETHODS = "ash.n.s",
    PLOTSCENARIOS = paste0(c("spiky","near-normal","flat-top","skew",
                             "big-normal","bimodal"),"-nn"))
p1.s.nn = plot_lfsr(lfsr.s.nn,ylim = c(0,1),xlim = c(0,0.2))
print(p1.s.nn + theme(legend.position = "none",
                      axis.text.x = element_text(size = 8,angle = 45)) +
      coord_equal(ratio = 1/5))

Warning: Removed 251083 rows containing missing values (geom_point).

Session information.

print(sessionInfo())

R version 3.3.2 (2016-10-31)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Sierra 10.12.2

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] knitr_1.15.1       RColorBrewer_1.1-2 ggplot2_2.2.0     
[4] reshape2_1.4.2     dplyr_0.5.0       

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.8      magrittr_1.5     munsell_0.4.3    xtable_1.8-2    
 [5] colorspace_1.2-6 R6_2.2.0         stringr_1.1.0    plyr_1.8.4      
 [9] tools_3.3.2      grid_3.3.2       gtable_0.2.0     DBI_0.5-1       
[13] dscr_0.1.1       htmltools_0.3.5  yaml_2.1.14      lazyeval_0.2.0  
[17] assertthat_0.1   rprojroot_1.1    digest_0.6.10    tibble_1.2      
[21] shiny_0.14.2     mime_0.5         evaluate_0.10    rmarkdown_1.3   
[25] labeling_0.3     stringi_1.1.2    scales_0.4.1     backports_1.0.4 
[29] httpuv_1.3.3

Plot local false sign rate results

Matthew Stephens

2015-10-26

Session information.