1 Introduction

The notebook is for meta-analysis of single-cell CITE-seq gene expression profiles of curated complement components generated from synovial biopsies from patients with RA.

2 Load packages

library(dplyr)
library(kableExtra)
library(ggplot2)
library(ComplexHeatmap)
library(circlize)
library(grid)
library(RColorBrewer)

3 Load data

# Meta data
meta_data <- tibble::as_tibble(
  readRDS("../Data/fine_cluster_all_314011cells_82samples_2023-03-12.rds")
)
# Normalized expression matrix
expr_matrix = readRDS("../Data/qc_mRNA_314011cells_log_normalized_matrix_2023-03-15.rds")

# Cell orders in meta data should be the same as expr matrix
meta_data <- meta_data[match(colnames(expr_matrix), meta_data$cell), , drop = FALSE]
str(meta_data)
## tibble [314,011 × 5] (S3: tbl_df/tbl/data.frame)
##  $ sample        : chr [1:314011] "BRI-399" "BRI-399" "BRI-399" "BRI-399" ...
##  $ cell          : chr [1:314011] "BRI-399_AAACGAACAGTCTGGC" "BRI-399_AAAGGATGTCTCAAGT" "BRI-399_AAAGTGACATCGAACT" "BRI-399_AAAGTGAGTGCACAAG" ...
##  $ cluster_number: chr [1:314011] "B-2" "B-1" "B-2" "B-1" ...
##  $ cluster_name  : Factor w/ 77 levels "B-0: CD24+CD27+CD11b+ switched memory",..: 2 4 2 4 1 4 3 3 2 8 ...
##  $ cell_type     : chr [1:314011] "B cell" "B cell" "B cell" "B cell" ...
str(expr_matrix)
## Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
##   ..@ i       : int [1:876405231] 53 66 74 93 132 154 190 193 201 251 ...
##   ..@ p       : int [1:314012] 0 1504 2834 3835 5144 6638 8431 9468 11027 12256 ...
##   ..@ Dim     : int [1:2] 33538 314011
##   ..@ Dimnames:List of 2
##   .. ..$ : chr [1:33538] "MIR1302-2HG" "FAM138A" "OR4F5" "AL627309.1" ...
##   .. ..$ : chr [1:314011] "BRI-399_AAACGAACAGTCTGGC" "BRI-399_AAAGGATGTCTCAAGT" "BRI-399_AAAGTGACATCGAACT" "BRI-399_AAAGTGAGTGCACAAG" ...
##   ..@ x       : num [1:876405231] 1.19 1.72 1.19 1.19 1.19 ...
##   ..@ factors : list()
meta_data[1:5,] %>% 
  DT::datatable(options = list(scrollx = T))

4 Curated complement components

complement_annotation <- list(
  Classical = c("C1QA", "C1QB", "C1QC", "C1R", "C1S","C1QBP"),
  Lectin = c("MASP1", "MASP2", "FCN1", "FCN2", "FCN3", "COLEC11", "COLEC12"),
  Classical_Lectin_Shared = c("C2"),
  Alternative = c("CFB", "CFD", "CFP"),
  Common = c("C3"),
  Terminal = c("C5", "C6", "C7", "C9"),
  Regulators = c("CD55", "CD46", "CD59", "SERPING1", "CLU","CFH"),
  Receptors = c("CR1", "CR2", "C3AR1", "C5AR1", "C5AR2"),
  Fc_Receptors = c("FCGR1A", "FCGR1B", "FCGR2A", "FCGR3A"),
  FHR = c("CFHR1", "CFHR2", "CFHR3", "CFHR4", "CFHR5")
)

gene_check_by_pathway <- do.call(rbind, lapply(names(complement_annotation), function(pathway) {
  genes <- complement_annotation[[pathway]]
  
  data.frame(
    pathway = pathway,
    gene = genes,
    present = genes %in% rownames(expr_matrix)
  )
}))
# Genes present in expr_matrix, keeping the pathway order
query_gene <- gene_check_by_pathway$gene[gene_check_by_pathway$present]

query_gene
##  [1] "C1QA"     "C1QB"     "C1QC"     "C1R"      "C1S"      "C1QBP"   
##  [7] "MASP1"    "MASP2"    "FCN1"     "FCN2"     "FCN3"     "COLEC11" 
## [13] "COLEC12"  "C2"       "CFB"      "CFD"      "CFP"      "C3"      
## [19] "C5"       "C6"       "C7"       "C9"       "CD55"     "CD46"    
## [25] "CD59"     "SERPING1" "CLU"      "CFH"      "CR1"      "CR2"     
## [31] "C3AR1"    "C5AR1"    "C5AR2"    "FCGR1A"   "FCGR1B"   "FCGR2A"  
## [37] "FCGR3A"   "CFHR1"    "CFHR2"    "CFHR3"    "CFHR4"    "CFHR5"
# Create the expression matrix with the genes of interest
matrix_heat <- expr_matrix[query_gene, , drop = FALSE] %>%
  Matrix::t() %>%  # cells x genes
  as.matrix() %>%
  as.data.frame() %>%
  mutate(cluster_name = as.character(meta_data$cluster_name))

dim(matrix_heat)
## [1] 314011     43
gene_annotation <- gene_check_by_pathway[gene_check_by_pathway$present, c("gene", "pathway")]

5 Meta-analysis

# Average of expression across cells in each cluster
exp_avg = aggregate(matrix_heat[,1:(ncol(matrix_heat)-1)], list(matrix_heat$cluster_name), mean)

dim(exp_avg)
## [1] 77 43
colnames(exp_avg)[1] <- "cluster_name"
exp_avg <- as.data.frame(t(exp_avg))   # gene x cell_type
colnames(exp_avg) <- as.character(t(exp_avg[1,]))
exp_avg <- exp_avg[-1,]  

row_names <- rownames(exp_avg)
exp_avg <- mutate_all(exp_avg, function(x) as.numeric(as.character(x)))
rownames(exp_avg) <- row_names 

# Visualize 15 complement genes x cell tyes
exp_avg[1:15,] %>% 
  as.data.frame() %>% 
   DT::datatable(options = list(scrollx = T))
exp_avg = as.matrix(exp_avg)
# Z-score 
scale_rows <- function(x) t(scale(t(x)))
exp_ave_scale <- scale_rows(exp_avg)

# Scale to (-2,2)
exp_ave_scale[exp_ave_scale > 2] <- 2 
exp_ave_scale[exp_ave_scale < -2] <- -2

exp_ave_scale <- na.omit(exp_ave_scale) 
dim(exp_ave_scale)
## [1] 41 77
# First 15 rows of the z score matrix
exp_ave_scale[1:15,] %>% 
  as.data.frame() %>%
    DT::datatable()
# Cell state annotations
cellstates <- colnames(exp_ave_scale)

cell_type <- ifelse(grepl("^M-", cellstates), "Myeloid",
             ifelse(grepl("^(F-|Mu)", cellstates), "Fibroblast",
             ifelse(grepl("^E-", cellstates), "Endothelial",
             ifelse(grepl("^T-", cellstates), "T cell",
             ifelse(grepl("^NK-", cellstates), "NK cell",
             ifelse(grepl("^B-", cellstates), "B/plasma cell",
                    NA))))))

annotation_col <- data.frame(
  `Cell type` = cell_type,
  check.names = FALSE
)

rownames(annotation_col) <- cellstates
# row annotation: complement gene group
gene_group_df <- do.call(rbind, lapply(names(complement_annotation), function(grp) {
  data.frame(
    gene = complement_annotation[[grp]],
    group = grp
  )
}))

gene_group <- gene_group_df$group[match(rownames(exp_ave_scale), gene_group_df$gene)]

gene_group_rename <- c(
  Alternative = "Alternative pathway-specific",
  Classical = "Classical pathway-specfic",
  Classical_Lectin_Shared = "Classical/Lectin pathway",
  Common = "Common pathway-shared across Classical/Lectin/Alternative",
  Fc_Receptors = "Fc receptors",
  FHR = "Factor H-Related Proteins",
  Lectin = "Lectin pathway-specific",
  Receptors = "Complement receptors",
  Regulators = "Complement regulators",
  Terminal = "Terminal pathway"
)

gene_group_new <- gene_group_rename[as.character(gene_group)]

annotation_row <- data.frame(
  `Complement pathway` = gene_group_new,
  check.names = FALSE
)

rownames(annotation_row) <- rownames(exp_ave_scale)
#colors
meta_colors <- list(
  `Cell type` = c(
    Myeloid = "#F8766D",
    Fibroblast = "#A6CEE3",
    Endothelial = "#1F78B4",
    `T cell` = "#CBB3D8",
    `NK cell` = "#89718F",
    `B/plasma cell` = "#B15928"
  ),
  `Complement pathway` = c(
    Classical = "#80B1D3",
    Lectin = "#BC80BD",
    Classical_Lectin_Shared = "#ffed6f",
    Alternative = "#FB8072",
    Common = "#d9d9d9",
    Terminal = "#FDB462",
    Regulators = "#B3DE69",
    Receptors = "#FCCDE5",
    #Non_Complement_Factors = "#D9D9D9",
    Fc_Receptors = "#CCEBC5",
    FHR = "#8dd3c7"
  )
)

meta_colors$`Complement pathway` <- meta_colors$`Complement pathway`[names(gene_group_rename)]
names(meta_colors$`Complement pathway`) <- unname(gene_group_rename)
# row clustering
row_hclust <- hclust(dist(exp_ave_scale))

# top annotations
col_prop <- table(meta_data$cluster_name) / nrow(meta_data)
col_prop <- as.numeric(col_prop[colnames(exp_ave_scale)])
names(col_prop) <- colnames(exp_ave_scale)
stopifnot(!any(is.na(col_prop)))

# proportion expressing cells = expr > 0 
row_prop <- Matrix::rowMeans(expr_matrix[rownames(exp_ave_scale), , drop = FALSE] > 0)
row_prop <- as.numeric(row_prop[rownames(exp_ave_scale)])
names(row_prop) <- rownames(exp_ave_scale)
stopifnot(!any(is.na(row_prop)))

top_ha <- HeatmapAnnotation(
  `Proportion\ncell state\nover all cells` = anno_barplot(
    col_prop,
    gp = gpar(fill = "grey60", col = "black"),
    border = FALSE,
    height = unit(1.0, "cm"),
    ylim = c(0, max(col_prop, na.rm = TRUE)),
    axis_param = list(
      side = "left",
      gp = gpar(fontsize = 8)
    )
  ),
  `Cell type` = annotation_col$`Cell type`,
  col = meta_colors,
  show_annotation_name = TRUE,
  annotation_name_gp = gpar(fontsize = 9, fontface = "bold")
)

# right annotation
right_ha <- rowAnnotation(
  `Proportion\nexpressing\ncells` = anno_barplot(
    row_prop,
    gp = gpar(fill = "grey60", col = "black"),
    border = FALSE,
    width = unit(1.0, "cm"),
    ylim = c(0, max(row_prop, na.rm = TRUE)),
    axis_param = list(
      side = "bottom",
      gp = gpar(fontsize = 8)
    )
  ),
  annotation_name_side = "bottom",
  annotation_name_gp = gpar(fontsize = 9)
)

# left annoation
left_ha <- rowAnnotation(
    df = annotation_row,
    col = meta_colors,
    show_annotation_name = FALSE
)
# Heatmap 
col_fun <- circlize::colorRamp2(
  breaks = c(-2, 0, 2),
  hcl_palette = "PuOr",
  reverse = TRUE
)

Heatmap(
  exp_ave_scale,
  name = "Z-score",
  
  col = col_fun,
  
  cluster_rows = as.dendrogram(row_hclust),
  row_split = 5,                 
  cluster_row_slices = FALSE,
  
  cluster_columns = TRUE,
  column_split = ,
  cluster_column_slices = FALSE,
  
  row_title = NULL,
  column_title = NULL,

  top_annotation = top_ha,
  left_annotation = left_ha,
  right_annotation = right_ha,
  
  show_row_names = TRUE,
  show_column_names = TRUE,
  row_names_side = "right",
  row_names_gp = gpar(fontsize = 11, fontface = "italic"),
  column_names_gp = gpar(fontsize = 12),
  column_names_rot = -45,
  
  show_row_dend = FALSE,
  show_column_dend = FALSE,
  
  rect_gp = gpar(col = "grey", lwd = 0.8),
  
  heatmap_legend_param = list(
    title = "Z-score"
  )
)