The notebook is for meta-analysis of single-cell CITE-seq gene expression profiles of curated complement components generated from synovial biopsies from patients with RA.
library(dplyr)
library(kableExtra)
library(ggplot2)
library(ComplexHeatmap)
library(circlize)
library(grid)
library(RColorBrewer)
# Meta data
meta_data <- tibble::as_tibble(
readRDS("../Data/fine_cluster_all_314011cells_82samples_2023-03-12.rds")
)
# Normalized expression matrix
expr_matrix = readRDS("../Data/qc_mRNA_314011cells_log_normalized_matrix_2023-03-15.rds")
# Cell orders in meta data should be the same as expr matrix
meta_data <- meta_data[match(colnames(expr_matrix), meta_data$cell), , drop = FALSE]
str(meta_data)
## tibble [314,011 × 5] (S3: tbl_df/tbl/data.frame)
## $ sample : chr [1:314011] "BRI-399" "BRI-399" "BRI-399" "BRI-399" ...
## $ cell : chr [1:314011] "BRI-399_AAACGAACAGTCTGGC" "BRI-399_AAAGGATGTCTCAAGT" "BRI-399_AAAGTGACATCGAACT" "BRI-399_AAAGTGAGTGCACAAG" ...
## $ cluster_number: chr [1:314011] "B-2" "B-1" "B-2" "B-1" ...
## $ cluster_name : Factor w/ 77 levels "B-0: CD24+CD27+CD11b+ switched memory",..: 2 4 2 4 1 4 3 3 2 8 ...
## $ cell_type : chr [1:314011] "B cell" "B cell" "B cell" "B cell" ...
str(expr_matrix)
## Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## ..@ i : int [1:876405231] 53 66 74 93 132 154 190 193 201 251 ...
## ..@ p : int [1:314012] 0 1504 2834 3835 5144 6638 8431 9468 11027 12256 ...
## ..@ Dim : int [1:2] 33538 314011
## ..@ Dimnames:List of 2
## .. ..$ : chr [1:33538] "MIR1302-2HG" "FAM138A" "OR4F5" "AL627309.1" ...
## .. ..$ : chr [1:314011] "BRI-399_AAACGAACAGTCTGGC" "BRI-399_AAAGGATGTCTCAAGT" "BRI-399_AAAGTGACATCGAACT" "BRI-399_AAAGTGAGTGCACAAG" ...
## ..@ x : num [1:876405231] 1.19 1.72 1.19 1.19 1.19 ...
## ..@ factors : list()
meta_data[1:5,] %>%
DT::datatable(options = list(scrollx = T))
complement_annotation <- list(
Classical = c("C1QA", "C1QB", "C1QC", "C1R", "C1S","C1QBP"),
Lectin = c("MASP1", "MASP2", "FCN1", "FCN2", "FCN3", "COLEC11", "COLEC12"),
Classical_Lectin_Shared = c("C2"),
Alternative = c("CFB", "CFD", "CFP"),
Common = c("C3"),
Terminal = c("C5", "C6", "C7", "C9"),
Regulators = c("CD55", "CD46", "CD59", "SERPING1", "CLU","CFH"),
Receptors = c("CR1", "CR2", "C3AR1", "C5AR1", "C5AR2"),
Fc_Receptors = c("FCGR1A", "FCGR1B", "FCGR2A", "FCGR3A"),
FHR = c("CFHR1", "CFHR2", "CFHR3", "CFHR4", "CFHR5")
)
gene_check_by_pathway <- do.call(rbind, lapply(names(complement_annotation), function(pathway) {
genes <- complement_annotation[[pathway]]
data.frame(
pathway = pathway,
gene = genes,
present = genes %in% rownames(expr_matrix)
)
}))
# Genes present in expr_matrix, keeping the pathway order
query_gene <- gene_check_by_pathway$gene[gene_check_by_pathway$present]
query_gene
## [1] "C1QA" "C1QB" "C1QC" "C1R" "C1S" "C1QBP"
## [7] "MASP1" "MASP2" "FCN1" "FCN2" "FCN3" "COLEC11"
## [13] "COLEC12" "C2" "CFB" "CFD" "CFP" "C3"
## [19] "C5" "C6" "C7" "C9" "CD55" "CD46"
## [25] "CD59" "SERPING1" "CLU" "CFH" "CR1" "CR2"
## [31] "C3AR1" "C5AR1" "C5AR2" "FCGR1A" "FCGR1B" "FCGR2A"
## [37] "FCGR3A" "CFHR1" "CFHR2" "CFHR3" "CFHR4" "CFHR5"
# Create the expression matrix with the genes of interest
matrix_heat <- expr_matrix[query_gene, , drop = FALSE] %>%
Matrix::t() %>% # cells x genes
as.matrix() %>%
as.data.frame() %>%
mutate(cluster_name = as.character(meta_data$cluster_name))
dim(matrix_heat)
## [1] 314011 43
gene_annotation <- gene_check_by_pathway[gene_check_by_pathway$present, c("gene", "pathway")]
# Average of expression across cells in each cluster
exp_avg = aggregate(matrix_heat[,1:(ncol(matrix_heat)-1)], list(matrix_heat$cluster_name), mean)
dim(exp_avg)
## [1] 77 43
colnames(exp_avg)[1] <- "cluster_name"
exp_avg <- as.data.frame(t(exp_avg)) # gene x cell_type
colnames(exp_avg) <- as.character(t(exp_avg[1,]))
exp_avg <- exp_avg[-1,]
row_names <- rownames(exp_avg)
exp_avg <- mutate_all(exp_avg, function(x) as.numeric(as.character(x)))
rownames(exp_avg) <- row_names
# Visualize 15 complement genes x cell tyes
exp_avg[1:15,] %>%
as.data.frame() %>%
DT::datatable(options = list(scrollx = T))
exp_avg = as.matrix(exp_avg)
# Z-score
scale_rows <- function(x) t(scale(t(x)))
exp_ave_scale <- scale_rows(exp_avg)
# Scale to (-2,2)
exp_ave_scale[exp_ave_scale > 2] <- 2
exp_ave_scale[exp_ave_scale < -2] <- -2
exp_ave_scale <- na.omit(exp_ave_scale)
dim(exp_ave_scale)
## [1] 41 77
# First 15 rows of the z score matrix
exp_ave_scale[1:15,] %>%
as.data.frame() %>%
DT::datatable()
# Cell state annotations
cellstates <- colnames(exp_ave_scale)
cell_type <- ifelse(grepl("^M-", cellstates), "Myeloid",
ifelse(grepl("^(F-|Mu)", cellstates), "Fibroblast",
ifelse(grepl("^E-", cellstates), "Endothelial",
ifelse(grepl("^T-", cellstates), "T cell",
ifelse(grepl("^NK-", cellstates), "NK cell",
ifelse(grepl("^B-", cellstates), "B/plasma cell",
NA))))))
annotation_col <- data.frame(
`Cell type` = cell_type,
check.names = FALSE
)
rownames(annotation_col) <- cellstates
# row annotation: complement gene group
gene_group_df <- do.call(rbind, lapply(names(complement_annotation), function(grp) {
data.frame(
gene = complement_annotation[[grp]],
group = grp
)
}))
gene_group <- gene_group_df$group[match(rownames(exp_ave_scale), gene_group_df$gene)]
gene_group_rename <- c(
Alternative = "Alternative pathway-specific",
Classical = "Classical pathway-specfic",
Classical_Lectin_Shared = "Classical/Lectin pathway",
Common = "Common pathway-shared across Classical/Lectin/Alternative",
Fc_Receptors = "Fc receptors",
FHR = "Factor H-Related Proteins",
Lectin = "Lectin pathway-specific",
Receptors = "Complement receptors",
Regulators = "Complement regulators",
Terminal = "Terminal pathway"
)
gene_group_new <- gene_group_rename[as.character(gene_group)]
annotation_row <- data.frame(
`Complement pathway` = gene_group_new,
check.names = FALSE
)
rownames(annotation_row) <- rownames(exp_ave_scale)
#colors
meta_colors <- list(
`Cell type` = c(
Myeloid = "#F8766D",
Fibroblast = "#A6CEE3",
Endothelial = "#1F78B4",
`T cell` = "#CBB3D8",
`NK cell` = "#89718F",
`B/plasma cell` = "#B15928"
),
`Complement pathway` = c(
Classical = "#80B1D3",
Lectin = "#BC80BD",
Classical_Lectin_Shared = "#ffed6f",
Alternative = "#FB8072",
Common = "#d9d9d9",
Terminal = "#FDB462",
Regulators = "#B3DE69",
Receptors = "#FCCDE5",
#Non_Complement_Factors = "#D9D9D9",
Fc_Receptors = "#CCEBC5",
FHR = "#8dd3c7"
)
)
meta_colors$`Complement pathway` <- meta_colors$`Complement pathway`[names(gene_group_rename)]
names(meta_colors$`Complement pathway`) <- unname(gene_group_rename)
# row clustering
row_hclust <- hclust(dist(exp_ave_scale))
# top annotations
col_prop <- table(meta_data$cluster_name) / nrow(meta_data)
col_prop <- as.numeric(col_prop[colnames(exp_ave_scale)])
names(col_prop) <- colnames(exp_ave_scale)
stopifnot(!any(is.na(col_prop)))
# proportion expressing cells = expr > 0
row_prop <- Matrix::rowMeans(expr_matrix[rownames(exp_ave_scale), , drop = FALSE] > 0)
row_prop <- as.numeric(row_prop[rownames(exp_ave_scale)])
names(row_prop) <- rownames(exp_ave_scale)
stopifnot(!any(is.na(row_prop)))
top_ha <- HeatmapAnnotation(
`Proportion\ncell state\nover all cells` = anno_barplot(
col_prop,
gp = gpar(fill = "grey60", col = "black"),
border = FALSE,
height = unit(1.0, "cm"),
ylim = c(0, max(col_prop, na.rm = TRUE)),
axis_param = list(
side = "left",
gp = gpar(fontsize = 8)
)
),
`Cell type` = annotation_col$`Cell type`,
col = meta_colors,
show_annotation_name = TRUE,
annotation_name_gp = gpar(fontsize = 9, fontface = "bold")
)
# right annotation
right_ha <- rowAnnotation(
`Proportion\nexpressing\ncells` = anno_barplot(
row_prop,
gp = gpar(fill = "grey60", col = "black"),
border = FALSE,
width = unit(1.0, "cm"),
ylim = c(0, max(row_prop, na.rm = TRUE)),
axis_param = list(
side = "bottom",
gp = gpar(fontsize = 8)
)
),
annotation_name_side = "bottom",
annotation_name_gp = gpar(fontsize = 9)
)
# left annoation
left_ha <- rowAnnotation(
df = annotation_row,
col = meta_colors,
show_annotation_name = FALSE
)
# Heatmap
col_fun <- circlize::colorRamp2(
breaks = c(-2, 0, 2),
hcl_palette = "PuOr",
reverse = TRUE
)
Heatmap(
exp_ave_scale,
name = "Z-score",
col = col_fun,
cluster_rows = as.dendrogram(row_hclust),
row_split = 5,
cluster_row_slices = FALSE,
cluster_columns = TRUE,
column_split = ,
cluster_column_slices = FALSE,
row_title = NULL,
column_title = NULL,
top_annotation = top_ha,
left_annotation = left_ha,
right_annotation = right_ha,
show_row_names = TRUE,
show_column_names = TRUE,
row_names_side = "right",
row_names_gp = gpar(fontsize = 11, fontface = "italic"),
column_names_gp = gpar(fontsize = 12),
column_names_rot = -45,
show_row_dend = FALSE,
show_column_dend = FALSE,
rect_gp = gpar(col = "grey", lwd = 0.8),
heatmap_legend_param = list(
title = "Z-score"
)
)