Read config.yml for the locations of sample sheet, pipeline output and cut sites coordinates.

Reading Config File:

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
suppressMessages(suppressWarnings(library(knitr)))
suppressMessages(suppressWarnings(library(ggplot2)))
suppressMessages(suppressWarnings(library(ggrepel)))
suppressMessages(suppressWarnings(library(data.table)))
suppressMessages(suppressWarnings(library(rtracklayer)))
suppressMessages(suppressWarnings(library(plotly)))
suppressMessages(suppressWarnings(library(GenomicRanges)))
suppressMessages(suppressWarnings(library(parallel)))
suppressMessages(suppressWarnings(library(pbapply)))
suppressMessages(suppressWarnings(library(DT)))

targetName <- params$target_name
config <- yaml::read_yaml('./config.yml')
sampleSheet <- data.table::fread(config$sample_sheet)
pipeline_output_dir <- config$pipeline_output_dir
cut_sites <- rtracklayer::import.bed(config$cut_sites_file)
sampleComparisons <- data.table::fread(config$comparisons_file)

Declare some common functions

importSampleBigWig <- function(pipeline_output_dir, samples, suffix = '.alnCoverage.bigwig') {
  sapply(simplify = F, USE.NAMES = T, 
                      X = unique(as.character(samples)), 
                      FUN = function(s) {
  f <- file.path(pipeline_output_dir, 'indels', s, paste0(s, suffix))
  if(file.exists(f)) {
    rtracklayer::import.bw(f, as = 'RleList')
  } else {
    stop("Can't find bigwig file for sample: ",s," at: ",
         "\n",f,"\n")
  }})
}

subsetRleListByRange <- function(input.rle, input.gr) {
  as.vector(input.rle[[seqnames(input.gr)]])[start(input.gr):end(input.gr)]
}


getReadsWithIndels <- function(pipeline_output_dir, samples) {
  readsWithIndels <- lapply(samples, function(sample) {
  dt <- data.table::fread(file.path(pipeline_output_dir, 
                                                 'indels',
                                                 sample, 
                                                 paste0(sample, ".reads_with_indels.tsv")))
  })
  names(readsWithIndels) <- samples
  return(readsWithIndels)
}

getIndels <- function(pipeline_output_dir, samples) {
  indels <- sapply(simplify = FALSE, samples, function(s) {
  f <- file.path(pipeline_output_dir, 
                 'indels',
                 s, 
                 paste0(s, ".indels.tsv"))
  if(file.exists(f)) {
    dt <- data.table::fread(f)
    dt$sample <- s
    return(dt)
  } else {
    stop("Can't open indels.tsv file for sample",s,
            "at",f,"\n")
  }})
  return(indels)
}

Subset sample sheet for those that match the target region of interest

targetName <- params$target_name
sampleSheet <- sampleSheet[target_name == targetName]
cutSiteStats <- as.data.table(do.call(rbind, lapply(sampleSheet$sample_name, function(sampleName) {
  f <- file.path(pipeline_output_dir, 'indels', sampleName, paste0(sampleName, '.sgRNA_efficiency.tsv'))
  if(file.exists(f)) {
    dt <- fread(f)
    return(dt)
  }
})))

sampleGuides <- lapply(sampleSheet$sample_name, function(s) {
  sgRNAs <- unlist(strsplit(x = sampleSheet[sampleSheet$sample_name == s,]$sgRNA_ids, 
                            split = ':'))
  if(sgRNAs[1] == 'none') {
    sgRNAs <- setdiff(unique(unlist(strsplit(x = sampleSheet[target_name == targetName,]$sgRNA_ids, split = ':'))), 'none')
  }
  return(sgRNAs)
})
names(sampleGuides) <- as.character(sampleSheet$sample_name)

cutSiteStats$sampleMatchesGuide <- as.factor(apply(cutSiteStats, 1, function(x) {
  s <- as.character(x[['sample']])
  g <- as.character(x[['sgRNA']])
  return(g %in% sampleGuides[[s]])
}))

Guide RNA efficiencies across samples as a table

dt <- cutSiteStats[sampleMatchesGuide == TRUE][,-4][,c(1,3,2)]
colnames(dt)[3] <- 'percentEfficiency'
DT::datatable(dt, 
          extensions = c('Buttons', 'FixedColumns', 'Scroller'), 
          options = list(fixedColumns = TRUE, 
                         scroller = TRUE,
                         scrollY = 400,
                         dom = 'Bfrtip',
                         buttons = c('copy', 'print', 'csv','excel', 'pdf')),
          filter = 'bottom'
          )

Guide RNA efficiencies across samples displayed as a heatmap

dt <- dcast(cutSiteStats[sampleMatchesGuide == TRUE], sample ~ sgRNA, value.var = 'scores')
M <- as.matrix(dt[,-1])
rownames(M) <- dt$sample

pheatmap::pheatmap(t(M), cluster_rows = F, cluster_cols = F, 
                   na_col = 'black', 
                   display_numbers = ncol(M) < 15 & nrow(M) < 15, 
                   number_color = 'black', 
                   main = '% Indel Efficiency Values at Cut Sites (+/- 5bp)')

Compare guide efficiencies across samples

ggplot(cutSiteStats[sampleMatchesGuide == TRUE], 
       aes(x = sgRNA, y = scores)) +
  geom_boxplot(outlier.shape = NA) + 
  geom_jitter(height = 0) + 
  coord_flip() + 
  labs(y = 'percent sgRNA efficiencies \n each dot represents a sample')