Evaluating human TIL subtype signatures using UCell
In this vignette we show how to use UCell gene signature scoring to identify and filter tumor-infiltrating T cell subtypes
Prepare environment
if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
::install("UCell")
BiocManager
library(Seurat)
library(UCell)
library(dplyr)
set.seed(123)
Query single-cell data
Use data from Yost et al. Nat Med 2019 Pre-compiled R object available here
<- "Yost.pretreatment.all.rds"
inputFile if (!file.exists(inputFile)) {
download.file("https://drive.switch.ch/index.php/s/cluBLHkFFzLZWzL/download",
inputFile)
}<- readRDS(inputFile)
data.seurat <- data.seurat %>%
data.seurat %>%
NormalizeData %>%
FindVariableFeatures %>%
ScaleData %>%
RunPCA RunUMAP(dims = 1:30)
data.seurat
An object of class Seurat
20787 features across 21328 samples within 1 assay
Active assay: RNA (20787 features, 2000 variable features)
2 dimensional reductions calculated: pca, umap
Colour by patient and original cluster annotations
DimPlot(data.seurat, group.by = "patient")
DimPlot(data.seurat, group.by = "cluster")
DimPlot(data.seurat, group.by = "cluster.T")
Unsupervised clustering
set.seed(123)
<- "pca"
which.red = 0.7
resol = 30
ndim
<- FindNeighbors(data.seurat, reduction = which.red, dims = 1:ndim)
data.seurat <- FindClusters(data.seurat, resolution = resol) data.seurat
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 21328
Number of edges: 776031
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9116
Number of communities: 26
Elapsed time: 3 seconds
DimPlot(data.seurat, reduction = "umap", group.by = "seurat_clusters", label = T) +
NoLegend()
Score general cell type signatures using UCell
Apply UCell with human cell type signatures to identify major cell types These signatures were extracted from Han et al Nature 2020 and further filtered
<- readRDS("aux/signaturesHumanCellTypes.rds")
signaturesHumanCellTypes
head(signaturesHumanCellTypes)
$Fetal.epithelial.progenitor
1 2 3 4 5 6 7 8 9 10
"BEX3" "STMN1" "SOX4" "LDHB" "SKP1" "SNRPE" "ID3" "SRP9" "GSTP1" "SRP14"
$Macrophage
1 2 3 4 5 6 7 8
"CTSB" "C1QB" "LAPTM5" "TYROBP" "PSAP" "C1QA" "HLA-DRA" "CTSD"
9 10
"NPC2" "FCER1G"
$B.cell..Plasmocyte.
1 2 3 4 5 6 7 8
"JCHAIN" "IGHA1" "SSR4" "MZB1" "IGKC" "IGHA2" "HERPUD1" "DERL3"
9 10
"SEC11C" "FKBP11"
$Fibroblast
1 2 3 4 5 6 7 8
"C1S" "TIMP2" "COL6A3" "SEMA3C" "MMP2" "GSN" "IGFBP6" "MFAP4"
9 10
"COL6A1" "PLAC9"
$Fasciculata.cell
1 2 3 4 5 6 7 8
"PEBP1" "STAR" "RARRES2" "CLU" "CYP21A2" "CYP17A1" "AKR1B1" "NOV"
9 10
"TPD52L1" "EPHX1"
$T.cell
1 2 3 4 5 6 7 8 9
"CD3D" "CD3E" "CD3G" "CD4" "CD2" "CD7" "TRAC" "TRBC1" "LAT"
<- AddModuleScore_UCell(data.seurat, features = signaturesHumanCellTypes,
data.seurat ncores = 4)
# Some major cell types to look at:
<- c("Macrophage", "Fibroblast", "T.cell", "Stromal.cell", "B.cell", "Myeloid.cell",
toplot "Endothelial.cell.1", "NK")
<- paste0(toplot, "_UCell")
featnames FeaturePlot(data.seurat, features = featnames, pt.size = 0.1, max.cutoff = "q99",
ncol = 2)
VlnPlot(data.seurat, features = featnames, pt.size = 0, split.by = "seurat_clusters",
ncol = 2)
We can appreciate that some clusters have a clearly high mean distribution of T cell signature scores (T.cell_UCell score > 0.2). We can use this value as a rudimentary threshold to split T cell from non-T cell clusters.
Identify T cells based on signatures and subset them
Identify T cell clusters by UCell score
# select as Tcell clusters only those with median Uscore>0.2
<- sapply(levels(data.seurat$seurat_clusters), function(x) {
medians median(data.seurat@meta.data[data.seurat$seurat_clusters == x, "T.cell_UCell"])
})<- names(medians[medians > 0.2])
tcell.clusters
# Add metadata
$is.Tcell <- FALSE
data.seurat@meta.data[data.seurat$seurat_clusters %in% tcell.clusters, "is.Tcell"] <- TRUE
data.seuratDimPlot(data.seurat, group.by = "is.Tcell")
Subset on T cells
<- subset(data.seurat, subset = is.Tcell == TRUE)
data.seurat.tcells data.seurat.tcells
An object of class Seurat
20787 features across 14246 samples within 1 assay
Active assay: RNA (20787 features, 2000 variable features)
2 dimensional reductions calculated: pca, umap
Recalculate embeddings only for filtered T cells
= 20
ndim = 3
resol <- data.seurat.tcells %>%
data.seurat.tcells %>%
NormalizeData %>%
FindVariableFeatures %>%
ScaleData %>%
RunPCA RunUMAP(dims = 1:ndim)
By patient and by annotation from original study
DimPlot(data.seurat.tcells, group.by = "patient")
DimPlot(data.seurat.tcells, group.by = "cluster.T")
DimPlot(data.seurat.tcells, group.by = "seurat_clusters")
Score TIL subtype-specific signatures using UCell
Now we can apply UCell using signatures specific for distict T cell subtypes. TIL states signatures were obtained from ProjecTILs’ reference TIL atlas (https://github.com/carmonalab/ProjecTILs), Andreatta et al Nature Communications (2021)
<- readRDS("aux/signaturesHumanTILs.rds")
signaturesHumanTILs signaturesHumanTILs
$Tfh
[1] "CD4" "CD40LG" "TOX2" "MAF" "CD200" "BATF"
$CD8_NaiveLike
[1] "CD8A" "CD8B" "CCR7" "IL7R" "SELL" "TCF7" "S1PR1" "LEF1"
$CD8_EffectorMemory
[1] "CD8A" "CD8B" "GZMA" "GZMK" "CCL5" "CXCR3"
$Thelper
[1] "CD40LG" "CD4" "IL7R" "RORA" "ANXA1"
$CD4_NaiveLike
[1] "CD40LG" "CD4" "CCR7" "SELL" "IL7R" "TCF7" "LEF1"
$CD8_Tpex
[1] "CD8A" "CD8B" "LAG3" "XCL1" "CRTAM" "TOX" "ZEB2" "PDCD1" "TCF7"
[10] "CCR7"
$CD8_Tex
[1] "CD8A" "CD8B" "LAG3" "HAVCR2" "GZMB" "PRF1" "PDCD1" "TIGIT"
$Treg
[1] "CD4" "IL2RA" "FOXP3"
"cycling"]] <- c("TOP2A", "MKI67", "STMN1")
signaturesHumanTILs[[
<- AddModuleScore_UCell(data.seurat.tcells, features = signaturesHumanTILs,
data.seurat.tcells ncores = 4)
<- paste0(names(signaturesHumanTILs), "_UCell")
featnames FeaturePlot(data.seurat.tcells, features = featnames, pt.size = 0.1, order = T)
VlnPlot(data.seurat.tcells, features = featnames, pt.size = 0, split.by = "seurat_clusters")
Compare TIL subtype signature scores against original annotation
Now we can assess gene signature scores (their average values) in each T cell cluster as defined by Yost et al. An overall agreement between T cell subtypes defined by Yost and the corresponding gene signature scores can be observed.For example, CD8_eff and CD8_mem both with high CD8_EffectorMemory score, CD8_ex_act and CD_ex with highest CD8_Tex score, Treg with highest Treg score and Th17 with highest Thelper score. Instead, other Yost clusters seem to be more heterogeneous/ambiguously defined, e.g. CD8_act with both high CD8_EffectorMemory and CD4 Thelper scores, and Naive cluster with high Thelper signal.
library(dplyr)
<- data.seurat.tcells@meta.data[, featnames]
TILsigScores <- TILsigScores %>%
TILsigScores_vs_OriginalCluster filter(!is.na(data.seurat.tcells@meta.data$cluster.T)) %>%
group_by(data.seurat.tcells@meta.data$cluster.T[!is.na(data.seurat.tcells@meta.data$cluster.T)]) %>%
summarise_each(mean)
<- as.matrix(TILsigScores_vs_OriginalCluster[,
TILsigScores_vs_OriginalCluster.m -1])
rownames(TILsigScores_vs_OriginalCluster.m) <- TILsigScores_vs_OriginalCluster[,
1][[1]]
heatmap(t(TILsigScores_vs_OriginalCluster.m), cexCol = 0.7, scale = "none")
For more in-depth analysis of T cell states we recommend projecting your data onto T cell reference atlases using ProjecTILs
Further reading
For more examples of UCell functionalities and reproducible R Notebook see UCell Demos
The code and the package are available at the UCell GitHub repository