Read config.yml for the locations of sample sheet, pipeline output and cut sites coordinates.

Reading Config File:

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
suppressMessages(suppressWarnings(library(knitr)))
suppressMessages(suppressWarnings(library(ggplot2)))
suppressMessages(suppressWarnings(library(ggrepel)))
suppressMessages(suppressWarnings(library(data.table)))
suppressMessages(suppressWarnings(library(rtracklayer)))
suppressMessages(suppressWarnings(library(plotly)))
suppressMessages(suppressWarnings(library(GenomicRanges)))
suppressMessages(suppressWarnings(library(parallel)))
suppressMessages(suppressWarnings(library(pbapply)))
suppressMessages(suppressWarnings(library(DT)))

targetName <- params$target_name
config <- yaml::read_yaml('./config.yml')
sampleSheet <- data.table::fread(config$sample_sheet)
pipeline_output_dir <- config$pipeline_output_dir
cut_sites <- rtracklayer::import.bed(config$cut_sites_file)
sampleComparisons <- data.table::fread(config$comparisons_file)

Declare some common functions

importSampleBigWig <- function(pipeline_output_dir, samples, suffix = '.alnCoverage.bigwig') {
  sapply(simplify = F, USE.NAMES = T, 
                      X = unique(as.character(samples)), 
                      FUN = function(s) {
  f <- file.path(pipeline_output_dir, 'indels', s, paste0(s, suffix))
  if(file.exists(f)) {
    rtracklayer::import.bw(f, as = 'RleList')
  } else {
    stop("Can't find bigwig file for sample: ",s," at: ",
         "\n",f,"\n")
  }})
}

subsetRleListByRange <- function(input.rle, input.gr) {
  as.vector(input.rle[[seqnames(input.gr)]])[start(input.gr):end(input.gr)]
}


getReadsWithIndels <- function(pipeline_output_dir, samples) {
  readsWithIndels <- lapply(samples, function(sample) {
  dt <- data.table::fread(file.path(pipeline_output_dir, 
                                                 'indels',
                                                 sample, 
                                                 paste0(sample, ".reads_with_indels.tsv")))
  })
  names(readsWithIndels) <- samples
  return(readsWithIndels)
}

getIndels <- function(pipeline_output_dir, samples) {
  indels <- sapply(simplify = FALSE, samples, function(s) {
  f <- file.path(pipeline_output_dir, 
                 'indels',
                 s, 
                 paste0(s, ".indels.tsv"))
  if(file.exists(f)) {
    dt <- data.table::fread(f)
    dt$sample <- s
    return(dt)
  } else {
    stop("Can't open indels.tsv file for sample",s,
            "at",f,"\n")
  }})
  return(indels)
}

Subset sample sheet for those that match the target region of interest

targetName <- params$target_name
sampleSheet <- sampleSheet[target_name == targetName]

Read bigwig files for the samples in the sample sheet

# alignment coverage 
alnCoverage <- importSampleBigWig(pipeline_output_dir, 
                                  sampleSheet$sample_name, ".alnCoverage.bigwig")

#per-base ratio of the number of reads with an insertion over the coverage
insertionScores <- importSampleBigWig(pipeline_output_dir, 
                                  sampleSheet$sample_name, ".insertionScores.bigwig")

#per-base ratio of the number of reads with a deletion over the coverage
deletionScores <- importSampleBigWig(pipeline_output_dir, 
                                  sampleSheet$sample_name, ".deletionScores.bigwig")

For each target region of interest, find the samples and for each sample plot coverage, insertion, and deletion profiles within the given window (target region: e.g. amplicon boundaries)

plots <- sapply(simplify = F, USE.NAMES = T, 
                X = unique(sampleSheet$target_name), FUN = function(t) {
                  sapply(simplify = F, USE.NAMES = T, 
                         X = unique(sampleSheet[target_name == t]$sample_name), 
                         FUN = function(s) {
  target_region <- as(sampleSheet[target_name == t & sample_name == s]$target_region, 'GRanges')

  #combine coverage, insertion, deletion scores
  #subset rlelist by target region
  dt <- data.table::data.table(
    'coverage' = subsetRleListByRange(alnCoverage[[s]], target_region), 
    'percent_with_insertion' = round(subsetRleListByRange(insertionScores[[s]], target_region) * 100, 2), #percent 
    'percent_with_deletion' = round(subsetRleListByRange(deletionScores[[s]], target_region) * 100, 2) #percent
  )
  dt$bp <- start(target_region):end(target_region)
  mdt <- reshape2::melt(dt, id.vars = 'bp')
  p <- ggplot2::ggplot(mdt, aes(x = bp, y = value, group = 'variable')) + 
    geom_line(aes(color = variable)) + facet_wrap(~ variable, scales= 'free_y', nrow = 3) + 
    labs(title = paste(t, target_region), x = paste0("Position at chr:",seqnames(target_region)), y = '')
  
  #get sample-specific cut sites at the target region
  sgRNAs <- unlist(strsplit(x = sampleSheet[sampleSheet$sample_name == s,]$sgRNA_ids, 
                     split = ':'))
  cs <- subsetByOverlaps(cut_sites[cut_sites$name %in% sgRNAs], target_region, ignore.strand = TRUE)
  
  #if there is one or more sample-specific cut sites, plot them 
  if(length(cs) > 0) {
    p <- p + geom_vline(data = as.data.frame(cs), aes(xintercept = start, 
                                           color = name), linetype = 'dotted')
  }

  return(p)
 })
})

1 Plots

out = NULL
for (target_name in names(plots)) {
  out = c(out, knitr::knit_expand(text='## {{target_name}} \n\n'))
  for(sample_name in names(plots[[target_name]])) {
    p <- ggplotly(plots[[target_name]][[sample_name]], height = 600)
    out = c(out, knitr::knit_expand(text='### {{sample_name}} \n\n {{p}} \n\n'))
  }}

1.1 sqt-2_ups

1.1.1 phen2_sqt-2_ups_N2ctrl

1.1.2 phen2_sqt-2_ups_sg1sg2sg3___F2

1.1.3 phen2_sqt-2_TATA_INR_sg1sg2___F2

1.1.4 phen2_picked_sqt-2_ups_16C_rol

1.1.5 phen2_picked_sqt-2_ups_24C_nonrol