Evaluating human TIL subtype signatures using UCell

In this vignette we show how to use UCell gene signature scoring to identify and filter tumor-infiltrating T cell subtypes

Prepare environment

if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
BiocManager::install("UCell")

library(Seurat)
library(UCell)
library(dplyr)
set.seed(123)

Query single-cell data

Use data from Yost et al. Nat Med 2019 Pre-compiled R object available here

inputFile <- "Yost.pretreatment.all.rds"
if (!file.exists(inputFile)) {
    download.file("https://drive.switch.ch/index.php/s/cluBLHkFFzLZWzL/download",
        inputFile)
}
data.seurat <- readRDS(inputFile)
data.seurat <- data.seurat %>%
    NormalizeData %>%
    FindVariableFeatures %>%
    ScaleData %>%
    RunPCA %>%
    RunUMAP(dims = 1:30)
data.seurat

An object of class Seurat 
20787 features across 21328 samples within 1 assay 
Active assay: RNA (20787 features, 2000 variable features)
 2 dimensional reductions calculated: pca, umap

Colour by patient and original cluster annotations

DimPlot(data.seurat, group.by = "patient")

DimPlot(data.seurat, group.by = "cluster")

DimPlot(data.seurat, group.by = "cluster.T")

Unsupervised clustering

set.seed(123)

which.red <- "pca"
resol = 0.7
ndim = 30

data.seurat <- FindNeighbors(data.seurat, reduction = which.red, dims = 1:ndim)
data.seurat <- FindClusters(data.seurat, resolution = resol)

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 21328
Number of edges: 776031

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9116
Number of communities: 26
Elapsed time: 3 seconds

DimPlot(data.seurat, reduction = "umap", group.by = "seurat_clusters", label = T) +
    NoLegend()

Score general cell type signatures using UCell

Apply UCell with human cell type signatures to identify major cell types These signatures were extracted from Han et al Nature 2020 and further filtered

signaturesHumanCellTypes <- readRDS("aux/signaturesHumanCellTypes.rds")

head(signaturesHumanCellTypes)

$Fetal.epithelial.progenitor
      1       2       3       4       5       6       7       8       9      10 
 "BEX3" "STMN1"  "SOX4"  "LDHB"  "SKP1" "SNRPE"   "ID3"  "SRP9" "GSTP1" "SRP14" 

$Macrophage
        1         2         3         4         5         6         7         8 
   "CTSB"    "C1QB"  "LAPTM5"  "TYROBP"    "PSAP"    "C1QA" "HLA-DRA"    "CTSD" 
        9        10 
   "NPC2"  "FCER1G" 

$B.cell..Plasmocyte.
        1         2         3         4         5         6         7         8 
 "JCHAIN"   "IGHA1"    "SSR4"    "MZB1"    "IGKC"   "IGHA2" "HERPUD1"   "DERL3" 
        9        10 
 "SEC11C"  "FKBP11" 

$Fibroblast
       1        2        3        4        5        6        7        8 
   "C1S"  "TIMP2" "COL6A3" "SEMA3C"   "MMP2"    "GSN" "IGFBP6"  "MFAP4" 
       9       10 
"COL6A1"  "PLAC9" 

$Fasciculata.cell
        1         2         3         4         5         6         7         8 
  "PEBP1"    "STAR" "RARRES2"     "CLU" "CYP21A2" "CYP17A1"  "AKR1B1"     "NOV" 
        9        10 
"TPD52L1"   "EPHX1" 

$T.cell
      1       2       3       4       5       6       7       8       9 
 "CD3D"  "CD3E"  "CD3G"   "CD4"   "CD2"   "CD7"  "TRAC" "TRBC1"   "LAT"

data.seurat <- AddModuleScore_UCell(data.seurat, features = signaturesHumanCellTypes,
    ncores = 4)

# Some major cell types to look at:
toplot <- c("Macrophage", "Fibroblast", "T.cell", "Stromal.cell", "B.cell", "Myeloid.cell",
    "Endothelial.cell.1", "NK")

featnames <- paste0(toplot, "_UCell")
FeaturePlot(data.seurat, features = featnames, pt.size = 0.1, max.cutoff = "q99",
    ncol = 2)

VlnPlot(data.seurat, features = featnames, pt.size = 0, split.by = "seurat_clusters",
    ncol = 2)

We can appreciate that some clusters have a clearly high mean distribution of T cell signature scores (T.cell_UCell score > 0.2). We can use this value as a rudimentary threshold to split T cell from non-T cell clusters.

Identify T cells based on signatures and subset them

Identify T cell clusters by UCell score

# select as Tcell clusters only those with median Uscore>0.2
medians <- sapply(levels(data.seurat$seurat_clusters), function(x) {
    median(data.seurat@meta.data[data.seurat$seurat_clusters == x, "T.cell_UCell"])
})
tcell.clusters <- names(medians[medians > 0.2])

# Add metadata
data.seurat$is.Tcell <- FALSE
data.seurat@meta.data[data.seurat$seurat_clusters %in% tcell.clusters, "is.Tcell"] <- TRUE
DimPlot(data.seurat, group.by = "is.Tcell")

Subset on T cells

data.seurat.tcells <- subset(data.seurat, subset = is.Tcell == TRUE)
data.seurat.tcells

An object of class Seurat 
20787 features across 14246 samples within 1 assay 
Active assay: RNA (20787 features, 2000 variable features)
 2 dimensional reductions calculated: pca, umap

Recalculate embeddings only for filtered T cells

ndim = 20
resol = 3
data.seurat.tcells <- data.seurat.tcells %>%
    NormalizeData %>%
    FindVariableFeatures %>%
    ScaleData %>%
    RunPCA %>%
    RunUMAP(dims = 1:ndim)

By patient and by annotation from original study

DimPlot(data.seurat.tcells, group.by = "patient")

DimPlot(data.seurat.tcells, group.by = "cluster.T")

DimPlot(data.seurat.tcells, group.by = "seurat_clusters")

Score TIL subtype-specific signatures using UCell

Now we can apply UCell using signatures specific for distict T cell subtypes. TIL states signatures were obtained from ProjecTILs’ reference TIL atlas (https://github.com/carmonalab/ProjecTILs), Andreatta et al Nature Communications (2021)

signaturesHumanTILs <- readRDS("aux/signaturesHumanTILs.rds")
signaturesHumanTILs

$Tfh
[1] "CD4"    "CD40LG" "TOX2"   "MAF"    "CD200"  "BATF"  

$CD8_NaiveLike
[1] "CD8A"  "CD8B"  "CCR7"  "IL7R"  "SELL"  "TCF7"  "S1PR1" "LEF1" 

$CD8_EffectorMemory
[1] "CD8A"  "CD8B"  "GZMA"  "GZMK"  "CCL5"  "CXCR3"

$Thelper
[1] "CD40LG" "CD4"    "IL7R"   "RORA"   "ANXA1" 

$CD4_NaiveLike
[1] "CD40LG" "CD4"    "CCR7"   "SELL"   "IL7R"   "TCF7"   "LEF1"  

$CD8_Tpex
 [1] "CD8A"  "CD8B"  "LAG3"  "XCL1"  "CRTAM" "TOX"   "ZEB2"  "PDCD1" "TCF7" 
[10] "CCR7" 

$CD8_Tex
[1] "CD8A"   "CD8B"   "LAG3"   "HAVCR2" "GZMB"   "PRF1"   "PDCD1"  "TIGIT" 

$Treg
[1] "CD4"   "IL2RA" "FOXP3"

signaturesHumanTILs[["cycling"]] <- c("TOP2A", "MKI67", "STMN1")

data.seurat.tcells <- AddModuleScore_UCell(data.seurat.tcells, features = signaturesHumanTILs,
    ncores = 4)

featnames <- paste0(names(signaturesHumanTILs), "_UCell")
FeaturePlot(data.seurat.tcells, features = featnames, pt.size = 0.1, order = T)

VlnPlot(data.seurat.tcells, features = featnames, pt.size = 0, split.by = "seurat_clusters")

Compare TIL subtype signature scores against original annotation

Now we can assess gene signature scores (their average values) in each T cell cluster as defined by Yost et al. An overall agreement between T cell subtypes defined by Yost and the corresponding gene signature scores can be observed.For example, CD8_eff and CD8_mem both with high CD8_EffectorMemory score, CD8_ex_act and CD_ex with highest CD8_Tex score, Treg with highest Treg score and Th17 with highest Thelper score. Instead, other Yost clusters seem to be more heterogeneous/ambiguously defined, e.g. CD8_act with both high CD8_EffectorMemory and CD4 Thelper scores, and Naive cluster with high Thelper signal.

library(dplyr)
TILsigScores <- data.seurat.tcells@meta.data[, featnames]
TILsigScores_vs_OriginalCluster <- TILsigScores %>%
    filter(!is.na(data.seurat.tcells@meta.data$cluster.T)) %>%
    group_by(data.seurat.tcells@meta.data$cluster.T[!is.na(data.seurat.tcells@meta.data$cluster.T)]) %>%
    summarise_each(mean)
TILsigScores_vs_OriginalCluster.m <- as.matrix(TILsigScores_vs_OriginalCluster[,
    -1])
rownames(TILsigScores_vs_OriginalCluster.m) <- TILsigScores_vs_OriginalCluster[,
    1][[1]]
heatmap(t(TILsigScores_vs_OriginalCluster.m), cexCol = 0.7, scale = "none")

For more in-depth analysis of T cell states we recommend projecting your data onto T cell reference atlases using ProjecTILs

Evaluating human TIL subtype signatures using UCell