PiGx scRNAseq
This report was generated with PiGx-scRNAseq version 0.0.1.
sceRdsFile = params$sceRdsFile
workdir = params$workdir
outFile = params$outFile
covariates = gsub(' ', '', unlist(strsplit(x = params$covariates, split = ',')))
inputParameterDesc = c('RDS format file containing a SingleCellExperiment object',
'Working directory',
'Path to HTML report output',
'Covariates to use when plotting (PCA and t-SNE)'
)
inputParameterValues = c(sceRdsFile,
workdir,
outFile,
paste0(covariates, collapse = ', '))
inputSettings = data.frame(parameters = inputParameterDesc,
values = inputParameterValues,
stringsAsFactors = FALSE)
DT::datatable(data = inputSettings,
extensions = 'FixedColumns',
options = list(fixedColumns = TRUE,
scrollX = TRUE,
pageLength = length(inputParameterValues),
dom = 't'))
First a pre-generated SingleCellExperiment object is read.
This object must minimally have the following assays:
cnts object normalized into counts per million in log2 scale)cpm)And the following reduced dimension datasets:
sce = readRDS(file = sceRdsFile)
print(sce)
## class: SingleCellExperiment
## dim: 46603 31471
## metadata(0):
## assays(3): cnts cpm scale
## rownames: NULL
## rowData names(8): Genes ndetected ... row_variability geneName
## colnames: NULL
## colData names(12): sample_name cell_id ... mean_expr
## CellCyclePhase
## reducedDimNames(2): PCA TSNE
## spikeNames(0):
plotCellStats = function(df, x, y, label_x, label_y, title) {
p = ggplot(df, aes_string(x=x, y=y)) +
geom_boxplot() +
labs(x = label_x, y = label_y, title = title) +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 90, hjust = 1))
return(p)
}
plotDesc = list('nGene' = c('Sample',
'Number of detected genes',
'Number of detected genes per cell'),
'max_gene' = c('Sample',
'Maximum gene expression per cell',
'Maximum gene expression'),
'mean_expr' = c('Sample',
'Average gene expression',
'Average gene expression per cell\nfor genes with >0 UMI'))
cellStatsPlots = lapply(names(plotDesc), function(y){
p = plotCellStats(df = as.data.frame(colData(sce)),
x = 'sample_name',
y = y,
label_x = plotDesc[[y]][1],
label_y = plotDesc[[y]][2],
title = plotDesc[[y]][3])
return(p)
})
names(cellStatsPlots) = paste(lapply(plotDesc, function(x) x[2]))
The plot displays the numbers of mapped reads (in millions) for total (green bars) and uniquely-mapped UMI (orange bars) for each sample.
The plot displays the numbers of mapped reads (in millions) categorized by annotation (as indicated by the legend) for each sample.
These are boxplots of cell statistics.
Number of detected genes displays the distributions of numbers of detected genes per cell. Maximum gene expression per cell displays the distributions of maximum gene expression per cell. Average gene expression displays the distributions of average gene expression for genes with at least 1 UMI per cell.
The following plots visualize the cells in the principle component space, for the pairs of components indicated in the axes. These plots can be used to assess the data structure as well as to detect clusters, trends, outliers, and batch effects. The sample points are colored by the feature indicated in the corresponding tab title.
reducedDimPlot = function(df, dim1, dim2, title = NULL, color_by = NULL, gradient = FALSE) {
p = ggplot2::ggplot(df, aes_string(x = dim1, y = dim2)) +
geom_point(aes_string(color = color_by), size = 0.5, alpha = 0.5) +
labs(title = title)
if (gradient == TRUE) {
p = p + scale_color_gradient2(midpoint = 3.3)
}
return(p)
}
colData(sce)$log10nGene = log10(colData(sce)$nGene)
gradientList = c('log10nGene')
covariateList = c(covariates, gradientList)
if('CellCyclePhase' %in% colnames(colData(sce))) {
covariateList = c(covariateList, 'CellCyclePhase')
}
pcaData = as.data.table(cbind(colData(sce), sce@reducedDims$PCA))
pcaPlotList <- lapply(covariateList, function(cov) {
pL<- lapply(paste0('PC', c(2:5)), function(pc) {
myTitle <- paste('PC1 vs', pc)
reducedDimPlot(df = pcaData,
dim1 = 'PC1',
dim2 = pc,
title = myTitle,
color_by = cov,
gradient = cov %in% gradientList)
})
names(pL) <- paste0('PC1 vs PC',c(2:5))
return(pL)
}
)
names(pcaPlotList) = covariateList