Read config.yml for the locations of sample sheet, pipeline output and cut sites coordinates.
Reading Config File:
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
suppressMessages(suppressWarnings(library(knitr)))
suppressMessages(suppressWarnings(library(ggplot2)))
suppressMessages(suppressWarnings(library(ggrepel)))
suppressMessages(suppressWarnings(library(data.table)))
suppressMessages(suppressWarnings(library(rtracklayer)))
suppressMessages(suppressWarnings(library(plotly)))
suppressMessages(suppressWarnings(library(GenomicRanges)))
suppressMessages(suppressWarnings(library(parallel)))
suppressMessages(suppressWarnings(library(pbapply)))
suppressMessages(suppressWarnings(library(DT)))
targetName <- params$target_name
config <- yaml::read_yaml('./config.yml')
sampleSheet <- data.table::fread(config$sample_sheet)
pipeline_output_dir <- config$pipeline_output_dir
cut_sites <- rtracklayer::import.bed(config$cut_sites_file)
sampleComparisons <- data.table::fread(config$comparisons_file)
Declare some common functions
importSampleBigWig <- function(pipeline_output_dir, samples, suffix = '.alnCoverage.bigwig') {
sapply(simplify = F, USE.NAMES = T,
X = unique(as.character(samples)),
FUN = function(s) {
f <- file.path(pipeline_output_dir, 'indels', s, paste0(s, suffix))
if(file.exists(f)) {
rtracklayer::import.bw(f, as = 'RleList')
} else {
stop("Can't find bigwig file for sample: ",s," at: ",
"\n",f,"\n")
}})
}
subsetRleListByRange <- function(input.rle, input.gr) {
as.vector(input.rle[[seqnames(input.gr)]])[start(input.gr):end(input.gr)]
}
getReadsWithIndels <- function(pipeline_output_dir, samples) {
readsWithIndels <- lapply(samples, function(sample) {
dt <- data.table::fread(file.path(pipeline_output_dir,
'indels',
sample,
paste0(sample, ".reads_with_indels.tsv")))
})
names(readsWithIndels) <- samples
return(readsWithIndels)
}
getIndels <- function(pipeline_output_dir, samples) {
indels <- sapply(simplify = FALSE, samples, function(s) {
f <- file.path(pipeline_output_dir,
'indels',
s,
paste0(s, ".indels.tsv"))
if(file.exists(f)) {
dt <- data.table::fread(f)
dt$sample <- s
return(dt)
} else {
stop("Can't open indels.tsv file for sample",s,
"at",f,"\n")
}})
return(indels)
}
Subset sample sheet for those that match the target region of interest
targetName <- params$target_name
sampleSheet <- sampleSheet[target_name == targetName]
cutSiteStats <- as.data.table(do.call(rbind, lapply(sampleSheet$sample_name, function(sampleName) {
f <- file.path(pipeline_output_dir, 'indels', sampleName, paste0(sampleName, '.sgRNA_efficiency.tsv'))
if(file.exists(f)) {
dt <- fread(f)
return(dt)
}
})))
sampleGuides <- lapply(sampleSheet$sample_name, function(s) {
sgRNAs <- unlist(strsplit(x = sampleSheet[sampleSheet$sample_name == s,]$sgRNA_ids,
split = ':'))
if(sgRNAs[1] == 'none') {
sgRNAs <- setdiff(unique(unlist(strsplit(x = sampleSheet[target_name == targetName,]$sgRNA_ids, split = ':'))), 'none')
}
return(sgRNAs)
})
names(sampleGuides) <- as.character(sampleSheet$sample_name)
cutSiteStats$sampleMatchesGuide <- as.factor(apply(cutSiteStats, 1, function(x) {
s <- as.character(x[['sample']])
g <- as.character(x[['sgRNA']])
return(g %in% sampleGuides[[s]])
}))
Guide RNA efficiencies across samples as a table
dt <- cutSiteStats[sampleMatchesGuide == TRUE][,-4][,c(1,3,2)]
colnames(dt)[3] <- 'percentEfficiency'
DT::datatable(dt,
extensions = c('Buttons', 'FixedColumns', 'Scroller'),
options = list(fixedColumns = TRUE,
scroller = TRUE,
scrollY = 400,
dom = 'Bfrtip',
buttons = c('copy', 'print', 'csv','excel', 'pdf')),
filter = 'bottom'
)
Guide RNA efficiencies across samples displayed as a heatmap
dt <- dcast(cutSiteStats[sampleMatchesGuide == TRUE], sample ~ sgRNA, value.var = 'scores')
M <- as.matrix(dt[,-1])
rownames(M) <- dt$sample
pheatmap::pheatmap(t(M), cluster_rows = F, cluster_cols = F,
na_col = 'black',
display_numbers = ncol(M) < 15 & nrow(M) < 15,
number_color = 'black',
main = '% Indel Efficiency Values at Cut Sites (+/- 5bp)')
Compare guide efficiencies across samples
ggplot(cutSiteStats[sampleMatchesGuide == TRUE],
aes(x = sgRNA, y = scores)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(height = 0) +
coord_flip() +
labs(y = 'percent sgRNA efficiencies \n each dot represents a sample')