UCell signature enrichment analysis
In this demo, we will apply UCell to evaluate gene signatures in single-cell dataset from 31 tumor biopsies of melanoma patients, sequenced using the Smart-seq2 protocol Jerby-Arnon et al. (2018) Cell.
Installation
if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
::install("UCell")
BiocManagerlibrary(UCell)
set.seed(123)
scRNA-seq data preparation
Download the gene expression matrix for this study from Gene Expression Omnibus - GSE115978
library(GEOquery)
<- "demodata.rds"
cached.object
if (!file.exists(cached.object)) {
<- "GSE115978"
geo_acc options(timeout = max(1000, getOption("timeout")))
<- getGEO(geo_acc)
gse getGEOSuppFiles(geo_acc)
<- read.csv(sprintf("%s/GSE115978_counts.csv.gz", geo_acc), header = T,
exp.mat row.names = 1, sep = ",")
saveRDS(exp.mat, cached.object)
else {
} <- readRDS(cached.object)
exp.mat }
Define gene signatures
Here we define some simple gene sets based on the “Human Cell Landscape” signatures Han et al. (2020) Nature. You may edit existing signatures, or add new one as elements in a list.
<- list(Immune = c("PTPRC"), Macrophage = c("CTSB", "C1QB", "LAPTM5",
signatures "TYROBP", "PSAP", "C1QA", "HLA-DRA", "CTSD", "NPC2", "FCER1G"), Tcell = c("CD3D",
"CD3E", "CD3G", "CD2"), Bcell = c("MS4A1", "CD79A", "CD79B", "CD19", "BANK1"),
Myeloid_cell = c("CD14", "LYZ", "CSF1R", "FCER1G", "SPI1", "LCK-"), Stromal = c("MMP2",
"COL1A1", "COL1A2", "COL3A1", "LUM", "DCN"))
Run UCell
Run ScoreSignatures_UCell
and get directly signature
scores for all cells
<- ScoreSignatures_UCell(exp.mat, features = signatures)
u.scores 1:8, 1:2] u.scores[
Immune_UCell Macrophage_UCell
cy78_CD45_neg_1_B04_S496_comb 0 0.3286333
cy79_p4_CD45_neg_PDL1_neg_E11_S1115_comb 0 0.2744000
CY88_5_B10_S694_comb 0 0.2673333
cy79_p1_CD45_neg_PDL1_pos_AS_C1_R1_F07_S67_comb 0 0.3904667
cy78_CD45_neg_3_H06_S762_comb 0 0.3680333
cy79_p1_CD45_neg_PDL1_pos_AS_C1_R1_G01_S73_comb 0 0.3810000
cy79_p1_CD45_neg_PDL1_neg_AS_C4_R1_D09_S141_comb 0 0.4197000
CY88_3_D02_S614_comb 0 0.3159667
Show the distribution of predicted scores
library(reshape2)
library(ggplot2)
<- reshape2::melt(u.scores)
melted colnames(melted) <- c("Cell", "Signature", "UCell_score")
<- ggplot(melted, aes(x = Signature, y = UCell_score)) + geom_violin(aes(fill = Signature),
p scale = "width") + geom_boxplot(width = 0.1, outlier.size = 0) + theme_bw() +
theme(axis.text.x = element_blank())
p
Pre-calculating gene rankings
The time- and memory-demanding step in UCell is the calculation of
gene rankings for each individual cell. If we plan to experiment with
signatures, editing them or adding new cell subtypes, it is possible to
pre-calculate the gene rankings once and for all and then apply new
signatures over these pre-calculated ranks. Run the
StoreRankings_UCell
function to pre-calculate gene rankings
over a dataset:
set.seed(123)
<- StoreRankings_UCell(exp.mat)
ranks 1:5, 1:5] ranks[
5 x 5 sparse Matrix of class "dgCMatrix"
cy78_CD45_neg_1_B04_S496_comb cy79_p4_CD45_neg_PDL1_neg_E11_S1115_comb
C9orf152 . .
RPS11 91.5 1498.5
ELMO2 . .
CREB3L1 . .
PNMA1 942.5 .
CY88_5_B10_S694_comb cy79_p1_CD45_neg_PDL1_pos_AS_C1_R1_F07_S67_comb
C9orf152 . .
RPS11 183.5 1000.0
ELMO2 . 758.5
CREB3L1 . .
PNMA1 . .
cy78_CD45_neg_3_H06_S762_comb
C9orf152 .
RPS11 122.5
ELMO2 .
CREB3L1 .
PNMA1 649.5
Then, we can apply our signature set, or any other new signature to the pre-calculated ranks. The calculations will be considerably faster.
set.seed(123)
.2 <- ScoreSignatures_UCell(features = signatures, precalc.ranks = ranks)
u.scores
<- reshape2::melt(u.scores.2)
melted colnames(melted) <- c("Cell", "Signature", "UCell_score")
<- ggplot(melted, aes(x = Signature, y = UCell_score)) + geom_violin(aes(fill = Signature),
p scale = "width") + geom_boxplot(width = 0.1, outlier.size = 0) + theme_bw() +
theme(axis.text.x = element_blank())
p
<- list(Mast.cell = c("TPSAB1", "TPSB2", "CPA3", "SRGN", "RGS2", "RGS1",
new.signatures "NFKBIA", "GLUL", "VIM", "ANXA1"), Erythroid.cell = c("HBA2", "HBG2", "HBA1",
"HBB", "HBG1", "AHSP", "ALAS2", "SLC25A37", "HBM"))
.3 <- ScoreSignatures_UCell(features = new.signatures, precalc.ranks = ranks)
u.scores<- reshape2::melt(u.scores.3)
melted colnames(melted) <- c("Cell", "Signature", "UCell_score")
<- ggplot(melted, aes(x = Signature, y = UCell_score)) + geom_violin(aes(fill = Signature),
p scale = "width") + geom_boxplot(width = 0.1, outlier.size = 0) + theme_bw() +
theme(axis.text.x = element_blank())
p
Multi-core processing
If your machine has multi-core capabilities and enough RAM, running UCell in parallel can speed up considerably your analysis. In this example we will use 4 parallel cores:
<- ScoreSignatures_UCell(exp.mat, features = signatures, ncores = 4)
u.scores
<- reshape2::melt(u.scores)
melted colnames(melted) <- c("Cell", "Signature", "Uscore")
<- ggplot(melted, aes(x = Signature, y = Uscore)) + geom_violin(aes(fill = Signature),
p scale = "width") + geom_boxplot(width = 0.1, outlier.size = 0) + theme_bw() +
theme(axis.text.x = element_blank())
p
Interacting with SingleCellExperiment or Seurat
UCell + SingleCellExperiment
The function ScoreSignatures_UCell()
allows operating
directly on sce
objects. UCell scores are returned in a
altExp object (altExp(sce, 'UCell'
))
library(SingleCellExperiment)
<- SingleCellExperiment(list(counts = exp.mat))
sce <- ScoreSignatures_UCell(sce, features = signatures, assay = "counts", name = NULL,
sce ncores = 4)
altExp(sce, "UCell")
class: SummarizedExperiment
dim: 6 7186
metadata(0):
assays(1): UCell
rownames(6): Immune Macrophage ... Myeloid_cell Stromal
rowData names(0):
colnames(7186): cy78_CD45_neg_1_B04_S496_comb
cy79_p4_CD45_neg_PDL1_neg_E11_S1115_comb ...
CY75_1_CD45_CD8_8__S289_comb_BCD8 CY75_1_CD45_CD8_8__S351_comb_BCD8
colData names(0):
Dimensionality reduction and visualization
library(scater)
library(patchwork)
# PCA
<- logNormCounts(sce)
sce <- runPCA(sce, scale = T, ncomponents = 20)
sce
# UMAP
set.seed(1234)
<- runUMAP(sce, dimred = "PCA") sce
Visualize UCell scores on low-dimensional representation (UMAP)
<- lapply(names(signatures), function(x) {
pll plotUMAP(sce, colour_by = x, by_exprs_values = "UCell", text_size = 10)
})wrap_plots(pll)
UCell + Seurat
The function AddModuleScore_UCell()
allows operating
directly on Seurat objects. UCell scores are returned as metadata
columns in the Seurat object. To see how this function differs from
Seurat’s own AddModuleScore()
(not based on per-cell ranks)
see this
vignette
library(Seurat)
<- CreateSeuratObject(counts = exp.mat, project = "JerbyArnon")
seurat.object <- AddModuleScore_UCell(seurat.object, features = signatures, name = NULL,
seurat.object ncores = 4)
head(seurat.object@meta.data)
orig.ident nCount_RNA
cy78_CD45_neg_1_B04_S496_comb cy78 357919
cy79_p4_CD45_neg_PDL1_neg_E11_S1115_comb cy79 5727
CY88_5_B10_S694_comb CY88 139218
cy79_p1_CD45_neg_PDL1_pos_AS_C1_R1_F07_S67_comb cy79 73996
cy78_CD45_neg_3_H06_S762_comb cy78 380341
cy79_p1_CD45_neg_PDL1_pos_AS_C1_R1_G01_S73_comb cy79 92485
nFeature_RNA Immune Macrophage
cy78_CD45_neg_1_B04_S496_comb 8094 0 0.3286333
cy79_p4_CD45_neg_PDL1_neg_E11_S1115_comb 2022 0 0.2744000
CY88_5_B10_S694_comb 5141 0 0.2673333
cy79_p1_CD45_neg_PDL1_pos_AS_C1_R1_F07_S67_comb 5517 0 0.3904667
cy78_CD45_neg_3_H06_S762_comb 7216 0 0.3680333
cy79_p1_CD45_neg_PDL1_pos_AS_C1_R1_G01_S73_comb 6822 0 0.3810000
Tcell Bcell Myeloid_cell
cy78_CD45_neg_1_B04_S496_comb 0 0 0.0000
cy79_p4_CD45_neg_PDL1_neg_E11_S1115_comb 0 0 0.0000
CY88_5_B10_S694_comb 0 0 0.0408
cy79_p1_CD45_neg_PDL1_pos_AS_C1_R1_F07_S67_comb 0 0 0.0000
cy78_CD45_neg_3_H06_S762_comb 0 0 0.0000
cy79_p1_CD45_neg_PDL1_pos_AS_C1_R1_G01_S73_comb 0 0 0.0000
Stromal
cy78_CD45_neg_1_B04_S496_comb 0.00000000
cy79_p4_CD45_neg_PDL1_neg_E11_S1115_comb 0.00000000
CY88_5_B10_S694_comb 0.00000000
cy79_p1_CD45_neg_PDL1_pos_AS_C1_R1_F07_S67_comb 0.00000000
cy78_CD45_neg_3_H06_S762_comb 0.08355556
cy79_p1_CD45_neg_PDL1_pos_AS_C1_R1_G01_S73_comb 0.00000000
Generate PCA and UMAP embeddings
<- NormalizeData(seurat.object)
seurat.object <- FindVariableFeatures(seurat.object, selection.method = "vst", nfeatures = 500)
seurat.object
<- ScaleData(seurat.object)
seurat.object <- RunPCA(seurat.object, features = seurat.object@assays$RNA@var.features,
seurat.object npcs = 20)
<- RunUMAP(seurat.object, reduction = "pca", dims = 1:20, seed.use = 123) seurat.object
Visualize UCell scores on low-dimensional representation (UMAP)
FeaturePlot(seurat.object, reduction = "umap", features = names(signatures), ncol = 3,
order = T)
Further reading
To see how UCell can interact directly with Seurat to analyse human PBMCs see THIS VIGNETTE
The code and the package are available at the UCell GitHub repository; more demos available at UCell demo repository