Evaluating human TIL subtype signatures using UCell
In this vignette we show how to use UCell gene signature scoring to identify and filter tumor-infiltrating T cell subtypes
Prepare environment
if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
BiocManager::install("UCell")
library(Seurat)
library(UCell)
library(dplyr)
set.seed(123)Query single-cell data
Use data from Yost et al. Nat Med 2019 Pre-compiled R object available here
inputFile <- "Yost.pretreatment.all.rds"
if (!file.exists(inputFile)) {
download.file("https://drive.switch.ch/index.php/s/cluBLHkFFzLZWzL/download",
inputFile)
}
data.seurat <- readRDS(inputFile)
data.seurat <- data.seurat %>%
NormalizeData %>%
FindVariableFeatures %>%
ScaleData %>%
RunPCA %>%
RunUMAP(dims = 1:30)
data.seuratAn object of class Seurat
20787 features across 21328 samples within 1 assay
Active assay: RNA (20787 features, 2000 variable features)
2 dimensional reductions calculated: pca, umap
Colour by patient and original cluster annotations
DimPlot(data.seurat, group.by = "patient")DimPlot(data.seurat, group.by = "cluster")DimPlot(data.seurat, group.by = "cluster.T")Unsupervised clustering
set.seed(123)
which.red <- "pca"
resol = 0.7
ndim = 30
data.seurat <- FindNeighbors(data.seurat, reduction = which.red, dims = 1:ndim)
data.seurat <- FindClusters(data.seurat, resolution = resol)Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 21328
Number of edges: 776031
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9116
Number of communities: 26
Elapsed time: 3 seconds
DimPlot(data.seurat, reduction = "umap", group.by = "seurat_clusters", label = T) +
NoLegend()Score general cell type signatures using UCell
Apply UCell with human cell type signatures to identify major cell types These signatures were extracted from Han et al Nature 2020 and further filtered
signaturesHumanCellTypes <- readRDS("aux/signaturesHumanCellTypes.rds")
head(signaturesHumanCellTypes)$Fetal.epithelial.progenitor
1 2 3 4 5 6 7 8 9 10
"BEX3" "STMN1" "SOX4" "LDHB" "SKP1" "SNRPE" "ID3" "SRP9" "GSTP1" "SRP14"
$Macrophage
1 2 3 4 5 6 7 8
"CTSB" "C1QB" "LAPTM5" "TYROBP" "PSAP" "C1QA" "HLA-DRA" "CTSD"
9 10
"NPC2" "FCER1G"
$B.cell..Plasmocyte.
1 2 3 4 5 6 7 8
"JCHAIN" "IGHA1" "SSR4" "MZB1" "IGKC" "IGHA2" "HERPUD1" "DERL3"
9 10
"SEC11C" "FKBP11"
$Fibroblast
1 2 3 4 5 6 7 8
"C1S" "TIMP2" "COL6A3" "SEMA3C" "MMP2" "GSN" "IGFBP6" "MFAP4"
9 10
"COL6A1" "PLAC9"
$Fasciculata.cell
1 2 3 4 5 6 7 8
"PEBP1" "STAR" "RARRES2" "CLU" "CYP21A2" "CYP17A1" "AKR1B1" "NOV"
9 10
"TPD52L1" "EPHX1"
$T.cell
1 2 3 4 5 6 7 8 9
"CD3D" "CD3E" "CD3G" "CD4" "CD2" "CD7" "TRAC" "TRBC1" "LAT"
data.seurat <- AddModuleScore_UCell(data.seurat, features = signaturesHumanCellTypes,
ncores = 4)
# Some major cell types to look at:
toplot <- c("Macrophage", "Fibroblast", "T.cell", "Stromal.cell", "B.cell", "Myeloid.cell",
"Endothelial.cell.1", "NK")
featnames <- paste0(toplot, "_UCell")
FeaturePlot(data.seurat, features = featnames, pt.size = 0.1, max.cutoff = "q99",
ncol = 2)VlnPlot(data.seurat, features = featnames, pt.size = 0, split.by = "seurat_clusters",
ncol = 2)We can appreciate that some clusters have a clearly high mean distribution of T cell signature scores (T.cell_UCell score > 0.2). We can use this value as a rudimentary threshold to split T cell from non-T cell clusters.
Identify T cells based on signatures and subset them
Identify T cell clusters by UCell score
# select as Tcell clusters only those with median Uscore>0.2
medians <- sapply(levels(data.seurat$seurat_clusters), function(x) {
median(data.seurat@meta.data[data.seurat$seurat_clusters == x, "T.cell_UCell"])
})
tcell.clusters <- names(medians[medians > 0.2])
# Add metadata
data.seurat$is.Tcell <- FALSE
data.seurat@meta.data[data.seurat$seurat_clusters %in% tcell.clusters, "is.Tcell"] <- TRUE
DimPlot(data.seurat, group.by = "is.Tcell")Subset on T cells
data.seurat.tcells <- subset(data.seurat, subset = is.Tcell == TRUE)
data.seurat.tcellsAn object of class Seurat
20787 features across 14246 samples within 1 assay
Active assay: RNA (20787 features, 2000 variable features)
2 dimensional reductions calculated: pca, umap
Recalculate embeddings only for filtered T cells
ndim = 20
resol = 3
data.seurat.tcells <- data.seurat.tcells %>%
NormalizeData %>%
FindVariableFeatures %>%
ScaleData %>%
RunPCA %>%
RunUMAP(dims = 1:ndim)By patient and by annotation from original study
DimPlot(data.seurat.tcells, group.by = "patient")DimPlot(data.seurat.tcells, group.by = "cluster.T")DimPlot(data.seurat.tcells, group.by = "seurat_clusters")Score TIL subtype-specific signatures using UCell
Now we can apply UCell using signatures specific for distict T cell subtypes. TIL states signatures were obtained from ProjecTILs’ reference TIL atlas (https://github.com/carmonalab/ProjecTILs), Andreatta et al Nature Communications (2021)
signaturesHumanTILs <- readRDS("aux/signaturesHumanTILs.rds")
signaturesHumanTILs$Tfh
[1] "CD4" "CD40LG" "TOX2" "MAF" "CD200" "BATF"
$CD8_NaiveLike
[1] "CD8A" "CD8B" "CCR7" "IL7R" "SELL" "TCF7" "S1PR1" "LEF1"
$CD8_EffectorMemory
[1] "CD8A" "CD8B" "GZMA" "GZMK" "CCL5" "CXCR3"
$Thelper
[1] "CD40LG" "CD4" "IL7R" "RORA" "ANXA1"
$CD4_NaiveLike
[1] "CD40LG" "CD4" "CCR7" "SELL" "IL7R" "TCF7" "LEF1"
$CD8_Tpex
[1] "CD8A" "CD8B" "LAG3" "XCL1" "CRTAM" "TOX" "ZEB2" "PDCD1" "TCF7"
[10] "CCR7"
$CD8_Tex
[1] "CD8A" "CD8B" "LAG3" "HAVCR2" "GZMB" "PRF1" "PDCD1" "TIGIT"
$Treg
[1] "CD4" "IL2RA" "FOXP3"
signaturesHumanTILs[["cycling"]] <- c("TOP2A", "MKI67", "STMN1")
data.seurat.tcells <- AddModuleScore_UCell(data.seurat.tcells, features = signaturesHumanTILs,
ncores = 4)featnames <- paste0(names(signaturesHumanTILs), "_UCell")
FeaturePlot(data.seurat.tcells, features = featnames, pt.size = 0.1, order = T)VlnPlot(data.seurat.tcells, features = featnames, pt.size = 0, split.by = "seurat_clusters")Compare TIL subtype signature scores against original annotation
Now we can assess gene signature scores (their average values) in each T cell cluster as defined by Yost et al. An overall agreement between T cell subtypes defined by Yost and the corresponding gene signature scores can be observed.For example, CD8_eff and CD8_mem both with high CD8_EffectorMemory score, CD8_ex_act and CD_ex with highest CD8_Tex score, Treg with highest Treg score and Th17 with highest Thelper score. Instead, other Yost clusters seem to be more heterogeneous/ambiguously defined, e.g. CD8_act with both high CD8_EffectorMemory and CD4 Thelper scores, and Naive cluster with high Thelper signal.
library(dplyr)
TILsigScores <- data.seurat.tcells@meta.data[, featnames]
TILsigScores_vs_OriginalCluster <- TILsigScores %>%
filter(!is.na(data.seurat.tcells@meta.data$cluster.T)) %>%
group_by(data.seurat.tcells@meta.data$cluster.T[!is.na(data.seurat.tcells@meta.data$cluster.T)]) %>%
summarise_each(mean)
TILsigScores_vs_OriginalCluster.m <- as.matrix(TILsigScores_vs_OriginalCluster[,
-1])
rownames(TILsigScores_vs_OriginalCluster.m) <- TILsigScores_vs_OriginalCluster[,
1][[1]]
heatmap(t(TILsigScores_vs_OriginalCluster.m), cexCol = 0.7, scale = "none")For more in-depth analysis of T cell states we recommend projecting your data onto T cell reference atlases using ProjecTILs
Further reading
For more examples of UCell functionalities and reproducible R Notebook see UCell Demos
The code and the package are available at the UCell GitHub repository