To investigate whether complement components are spatially organized within RA synovium, we developed a spatial neighborhood-based modeling framework for gene-level associations to define complement cellular niches for spatial transcriptomics.
This tutorial outlines key steps of applying the spatial neighborhood framework to measure spatial relationships between complement signatures and canonical synovial structural markers in RA synovial.
To run this framework, the following inputs are required:
We provided a set of custom R functions for this pipeline. Before running the analysis, please load the required functions from ./Spatial_modeling_R_functions.R.
source("../R/Spatial_modeling_R_functions.R")
library(qs)
library(Seurat)
library(Matrix)
library(ComplexHeatmap)
library(RANN)
library(dplyr)
library(ggplot2)
library(circlize)
# Load object that includes multiple RA samples
Xenium_samples <- qread("../../Data/xenium_merged.qs")
# In this tutorial, we take RA331 (RA1) as an example
RA1_obj <- subset(Xenium_samples, subset = sample == "RA331")
# RA1 meta data
meta_RA1 <- RA1_obj@meta.data
meta_RA1$cell <- rownames(meta_RA1)
coord_df_RA1 <- data.frame(cell = meta_RA1$cell, x = meta_RA1$x, y = meta_RA1$y, check.names = FALSE)
coord_df_RA1[1:5,] %>%
DT::datatable(options = list(scrollx = T))
# Define the RA1 region of interest (ROI)
xlim <- c(4200, 8000)
ylim <- c(7.151, 6300)
# Select ROI cells
coord_df_RA1$selected <- with(coord_df_RA1, x >= xlim[1] & x <= xlim[2] & y >= ylim[1] & y <= ylim[2])
cells_seg <- coord_df_RA1$cell[coord_df_RA1$selected] # subset of cells
cat("N selected = ", length(cells_seg), "\n")
## N selected = 153815
# Visualize ROI
p_rect <- ggplot(coord_df_RA1, aes(y, x)) +
geom_point(aes(color = selected), size = 0.15, alpha = 0.8, stroke = 0) +
annotate("rect", xmin = ylim[1], xmax = ylim[2],
ymin = xlim[1], ymax = xlim[2],
fill = NA, color = "black", linewidth = 0.4) +
scale_color_manual(values = c("FALSE" = "grey80", "TRUE" = "darkslategray3")) +
coord_flip() +
coord_fixed() +
labs(
title = "RA1 ROI",
x = "y", y = "x"
) +
theme_bw(base_size = 11) +
theme(legend.position = "none")
p_rect
RA1_seg <- subset(RA1_obj, cells = cells_seg)
# Subest 1/5 cells for a quick render of this tutorial
set.seed(42)
n_cells <- round(ncol(RA1_seg) / 5)
sampled_cells <- sample(colnames(RA1_seg), size = n_cells)
RA1_seg_subset <- subset(RA1_seg, cells = sampled_cells)
# Meta data
df_meta_seg_sub <- meta_RA1[
rownames(meta_RA1) %in% colnames(RA1_seg_subset),
]
# Spatial coordinate x,y
coord_df_sub_RA1 <- data.frame(cell = rownames(df_meta_seg_sub), x = df_meta_seg_sub$x, y = df_meta_seg_sub$y, check.names = FALSE)
# Query genes: we use curated complement signatures, canonical synovial lining markers, canonical synovial sublining markers, and curated inflammatory and IL-6-associated signatures.
lining_genes <- c("MMP3", "MMP1","HBEGF","CD68","SPP1","GPNMB")
sublining_genes <- c("THY1", "CXCL12", "SFRP1","LYVE1","COL5A2")
complement_sublining <- c("C3", "COLEC12","CFB")
complement_lining <- c("C5AR1", "FCGR3A","FCGR1A")
inflammatory_signatures <- c("TNF", "CXCL2", "ATF3","CXCL3" ,"CLEC5A","LIF", "CXCL9","IL27","SOCS3")
gene_groups <- list(
lining_genes = lining_genes,
sublining_genes = sublining_genes,
complement_lining = complement_lining,
complement_sublining = complement_sublining,
inflammatory_signatures = inflammatory_signatures
)
# Extract normalized expression matrix(log-normalized counts)
RA1_expr <- GetAssayData(RA1_seg_subset, layer = "data")
RA1_seg_sub_expr <- t(as.matrix(RA1_expr)) # cell by gene
dim(RA1_seg_sub_expr)
## [1] 30763 480
RA1_seg_sub_expr[1:10,1:10] %>%
DT::datatable(options = list(scrollx = T))
# Construct expression matrix
query_genes <- preprocess_gene_expr(
expr_mat = RA1_seg_sub_expr,
gene_groups = gene_groups
)
# Intersect cells between expr and coords
cells_common <- intersect(rownames(RA1_seg_sub_expr), rownames(df_meta_seg_sub))
expr_mat_RA1 <- RA1_seg_sub_expr[cells_common, , drop = FALSE]
coords_RA1 <- df_meta_seg_sub[cells_common, c("x","y"), drop = FALSE]
# Subset expression to selected genes
X_all <- expr_mat_RA1[, query_genes, drop = FALSE]
X_all <- as.matrix(X_all)
dim(X_all) # cells x genes
## [1] 30763 25
To capture distance-decayed local influence, we used Gaussian kernel to weight the k-nearest spatial neighboring cells as a function of spatial distance (e.g., k = 20 nearest neighbors). The resulting weight matrix was used to summarize local spatial neighborhood signals.
# Gaussian kernel-weighted object
weights_obj <- GaussianKernel_weights(
coords_df = coords_RA1,
k = 20
)
# Extract 1) cell density quantifying regional variation
cell_density <- weights_obj$cell_density_z
# Extract 2) spatial neighborhood weights
W_mat <- weights_obj$W_sparse
# Define distance-weighted neighborhood exposure of cell
E_mat <- distance_weighted_neighborhoods_exposure(
X_all = X_all,
W_sparse = W_mat
)
dim(E_mat) # cells x genes
## [1] 30763 25
E_mat[1:5,1:5] %>%
DT::datatable(options = list(scrollx = T))
For each exposure gene, the direction-pair regression model tests whether its spatial neighborhood expression is associated with each outcome gene, adjusting for: 1) self expression of the exposure gene, 2) local cell density.
Sys.setenv(
OMP_NUM_THREADS = "1",
OPENBLAS_NUM_THREADS = "1",
MKL_NUM_THREADS = "1",
VECLIB_MAXIMUM_THREADS = "1",
NUMEXPR_NUM_THREADS = "1"
)
set.seed(42)
spatial_res_RA1 <- spatial_neighborhood_model_all(
X_matrix = X_all,
E_weighted_matrix = E_mat,
W_weights = W_mat,
cell_density = cell_density,
B = 1000, # generate empirical p-values
n_cores = 4,
seed = 123,
verbose = TRUE
)
# To account for multiple testing across all directional gene pairs, empirical p-values were adjusted using the Benjamini-Hochberg procedure, producing a false discovery rate (FDR) value for each gene pair.
spatial_res_RA1 <- spatial_association_fdr(spatial_res_RA1)
# Build gene-by-gene matrix: rows = outcome genes, cols = exposure genes
heat_mat_RA1 <- matrix(
0,
nrow = length(query_genes),
ncol = length(query_genes),
dimnames = list(query_genes, query_genes)
)
# Define signed colocalization significance score: C = sign(beta_hat)*[-log(FDR)]
for (i in seq_len(nrow(spatial_res_RA1))) {
g <- spatial_res_RA1$outcome_gene[i]
h <- spatial_res_RA1$exposure_gene[i]
heat_mat_RA1[g, h] <- spatial_res_RA1$signed_log10_fdr[i]
}
# Gene annotations
gene_annot <- data.frame(
gene = c(
intersect(complement_lining, query_genes),
intersect(lining_genes, query_genes),
intersect(complement_sublining, query_genes),
intersect(sublining_genes, query_genes),
intersect(inflammatory_signatures, query_genes)
),
group = c(
rep("Complement-lining", length(intersect(complement_lining, query_genes))),
rep("Lining", length(intersect(lining_genes, query_genes))),
rep("Complement-sublining", length(intersect(complement_sublining, query_genes))),
rep("Sublining", length(intersect(sublining_genes, query_genes))),
rep("Inflammatory and IL-6-associated", length(intersect(inflammatory_signatures, query_genes)))
),
stringsAsFactors = FALSE
)
gene_order <- gene_annot$gene
gene_group <- factor(
gene_annot$group,
levels = c(
"Complement-lining",
"Lining",
"Complement-sublining",
"Sublining",
"Inflammatory and IL-6-associated"
)
)
heat_mat_RA1 <- heat_mat_RA1[gene_order, gene_order, drop = FALSE]
# Heatmap of signed colocalization significance score
group_cols <- c(
"Complement-lining" = "#f4a582",
"Lining" = "#d95f02",
"Complement-sublining" = "#7570b3",
"Sublining" = "#53308F",
"Inflammatory and IL-6-associated" = "#f2e7b5"
)
top_ha <- HeatmapAnnotation(
Group = gene_group,
col = list(Group = group_cols),
show_annotation_name = FALSE,
show_legend = FALSE
)
left_ha <- rowAnnotation(
Group = gene_group,
col = list(Group = group_cols),
show_annotation_name = FALSE
)
lim <- max(abs(heat_mat_RA1), na.rm = TRUE)
col_fun <- colorRamp2(
c(-lim, 0, lim),
c("#92c5de", "#f5f5dc", "orange")
)
Heatmap(
heat_mat_RA1,
name = "signed\n-log10(FDR)",
col = col_fun,
cluster_rows = TRUE,
row_split = 2,
cluster_row_slices = TRUE,
cluster_columns = TRUE,
column_split = 2,
cluster_column_slices = FALSE,
top_annotation = top_ha,
left_annotation = left_ha,
row_names_side = "right",
show_row_dend = FALSE,
show_column_dend = FALSE,
row_title = NULL,
column_names_rot = -45,
column_title = "RA1 spatial colocalization analysis",
rect_gp = gpar(col = "lightgrey", lwd = 0.5),
row_names_gp = gpar(fontsize = 14,fontface = "italic",fontfamily = "sans"),
column_names_gp = gpar(fontsize = 14, fontfamily = "sans")
)
# Build discrete significance matrix
sig_mat_RA1 <- matrix(
"NS",
nrow = nrow(heat_mat_RA1),
ncol = ncol(heat_mat_RA1),
dimnames = dimnames(heat_mat_RA1)
)
sig_mat_RA1[heat_mat_RA1 > -log10(0.05)] <- "Pos"
sig_mat_RA1[heat_mat_RA1 < -log10(0.05)] <- "Neg"
sig_mat_RA1[heat_mat_RA1 == -log10(0.05) | is.na(heat_mat_RA1)] <- "NS"
diag(sig_mat_RA1) <- "diag"
sig_cols <- c(
Neg = "#2166AC",
NS = "grey",
Pos = "#B2182B",
diag = "white"
)
mat_for_cluster <- heat_mat_RA1
mat_for_cluster[is.na(mat_for_cluster)] <- 0
row_hclust <- hclust(dist(mat_for_cluster))
col_hclust <- hclust(dist(t(mat_for_cluster)))
# cluster block
row_split <- factor(
cutree(row_hclust, k = 2),
levels = sort(unique(cutree(row_hclust, k = 2)))
)
col_split <- factor(
cutree(col_hclust, k = 2),
levels = sort(unique(cutree(col_hclust, k = 2)))
)
Heatmap(
sig_mat_RA1,
name = "sig",
col = sig_cols,
cluster_rows = as.dendrogram(row_hclust),
row_split = 2,
cluster_row_slices = FALSE,
cluster_columns = as.dendrogram(col_hclust),
column_split = 2,
cluster_column_slices = FALSE,
show_row_dend = FALSE,
show_column_dend = FALSE,
row_title = NULL,
top_annotation = top_ha,
left_annotation = left_ha,
row_names_side = "right",
column_names_rot = 45,
rect_gp = gpar(col = "lightgrey", lwd = 0.5),
row_names_gp = gpar(
fontsize = 12,
fontface = "italic",
fontfamily = "sans"
),
column_title = "RA1 spatial colocalization analysis (significance)",
column_names_gp = gpar(
fontsize = 12,
fontfamily = "sans"
)
)
To define compartment-specific complement-associated niches at the cell level, we constructed regression-weighted niche scores directly from the direction-pair regression results. We identify two sharply segregated complement niches quantified the degree to which cells were embedded within a local complement-lining or -sublining neighborhood.
# Specifically, we focused on significantly positively colocalized gene pairs: fdr < 0.05 and beta > 0
cl_pairs <- subset(
spatial_res_RA1,
outcome_gene %in% lining_genes &
exposure_gene %in% complement_lining &
beta_exposure > 0 &
fdr < 0.05
)
cs_pairs <- subset(
spatial_res_RA1,
outcome_gene %in% sublining_genes &
exposure_gene %in% complement_sublining &
beta_exposure > 0 &
fdr < 0.05
)
plot_df <- data.frame(
cell = rownames(coords_RA1),
x = coords_RA1[, "x"],
y = coords_RA1[, "y"],
stringsAsFactors = FALSE
)
for (g in colnames(E_mat)) {
plot_df[[paste0("exp_", g)]] <- E_mat[, g]
}
plot_df$y_plot <- -plot_df$y
# Compute weighted module score from significant positive pairs
# score_i = sum_{pairs} beta_exposure * E_i,h
plot_df$CL_module_weighted <- compute_weighted_module_score(cl_pairs, E_mat)
plot_df$CS_module_weighted <- compute_weighted_module_score(cs_pairs, E_mat)
plot_df$CL_minus_CS_weighted <- plot_df$CL_module_weighted - plot_df$CS_module_weighted
p_cl_weighted <- plot_spatial_feature(
plot_df, "CL_module_weighted", "RA1 - Complement-lining niche score"
)
p_cs_weighted <- plot_spatial_feature(
plot_df, "CS_module_weighted", "RA1 - Complement-sublining niche score"
)
p_cl_weighted + p_cs_weighted
## Concordance with unbiased spatial niche clusters adapted from Mantel et al.
cell_match <- match(plot_df$cell, meta_RA1$cell)
meta_RA1 <- meta_RA1[cell_match[!is.na(cell_match)], ]
df_unbiased_label <- meta_RA1 %>% select("cell","x","y","Niche")
# remove duplicated cells
sum(duplicated(df_unbiased_label$cell))
## [1] 0
sum(duplicated(plot_df$cell))
## [1] 0
df_concord <- df_unbiased_label %>%
inner_join(plot_df, by = "cell")
# Focus on the unbiased synovial Lining (Niche3) and Sublining (Niche4) niche
df_concord_sub <- df_concord[df_concord$Niche %in% c("Niche3", "Niche4"), ]
df_concord_sub <- droplevels(
df_concord[df_concord$Niche %in% c("Niche3", "Niche4"), ]
)
# Statistical test and compute the correlation between our regression-weighted niche scores and unbiased niche labels
wilcox.test(CL_module_weighted ~ Niche, data = df_concord_sub)
##
## Wilcoxon rank sum test with continuity correction
##
## data: CL_module_weighted by Niche
## W = 103763995, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
df_concord_sub$Niche_binary <- ifelse(df_concord_sub$Niche == "Niche3", 1, 0)
res <- cor.test(
df_concord_sub$Niche_binary,
df_concord_sub$CL_module_weighted,
method = "pearson"
)
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.3.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Denver
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] circlize_0.4.17 ggplot2_4.0.2 dplyr_1.2.0
## [4] RANN_2.6.2 ComplexHeatmap_2.20.0 Matrix_1.7-5
## [7] Seurat_5.4.0 SeuratObject_5.3.0 sp_2.2-1
## [10] qs_0.27.3 BiocStyle_2.32.1
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 shape_1.4.6.1 rstudioapi_0.18.0
## [4] jsonlite_2.0.0 magrittr_2.0.4 magick_2.9.1
## [7] spatstat.utils_3.2-2 farver_2.1.2 rmarkdown_2.30
## [10] GlobalOptions_0.1.3 vctrs_0.7.2 ROCR_1.0-12
## [13] Cairo_1.7-0 spatstat.explore_3.8-0 tinytex_0.58
## [16] htmltools_0.5.9 sass_0.4.10 sctransform_0.4.3
## [19] parallelly_1.46.1 KernSmooth_2.23-26 bslib_0.10.0
## [22] htmlwidgets_1.6.4 ica_1.0-3 plyr_1.8.9
## [25] plotly_4.12.0 zoo_1.8-15 cachem_1.1.0
## [28] igraph_2.2.2 mime_0.13 lifecycle_1.0.5
## [31] iterators_1.0.14 pkgconfig_2.0.3 R6_2.6.1
## [34] fastmap_1.2.0 clue_0.3-67 fitdistrplus_1.2-6
## [37] future_1.70.0 shiny_1.13.0 digest_0.6.39
## [40] colorspace_2.1-2 S4Vectors_0.42.1 patchwork_1.3.2
## [43] tensor_1.5.1 RSpectra_0.16-2 irlba_2.3.7
## [46] crosstalk_1.2.2 labeling_0.4.3 progressr_0.18.0
## [49] spatstat.sparse_3.1-0 httr_1.4.8 polyclip_1.10-7
## [52] abind_1.4-8 compiler_4.4.1 withr_3.0.2
## [55] doParallel_1.0.17 S7_0.2.1 fastDummies_1.7.5
## [58] MASS_7.3-65 rjson_0.2.23 tools_4.4.1
## [61] lmtest_0.9-40 otel_0.2.0 httpuv_1.6.17
## [64] future.apply_1.20.2 goftest_1.2-3 glue_1.8.0
## [67] nlme_3.1-168 promises_1.5.0 Rtsne_0.17
## [70] cluster_2.1.8.2 reshape2_1.4.5 generics_0.1.4
## [73] gtable_0.3.6 spatstat.data_3.1-9 tidyr_1.3.2
## [76] data.table_1.18.2.1 RApiSerialize_0.1.4 stringfish_0.18.0
## [79] BiocGenerics_0.50.0 spatstat.geom_3.7-2 RcppAnnoy_0.0.23
## [82] ggrepel_0.9.8 foreach_1.5.2 pillar_1.11.1
## [85] stringr_1.6.0 spam_2.11-3 RcppHNSW_0.6.0
## [88] later_1.4.8 splines_4.4.1 lattice_0.22-9
## [91] survival_3.8-6 deldir_2.0-4 tidyselect_1.2.1
## [94] miniUI_0.1.2 pbapply_1.7-4 knitr_1.51
## [97] gridExtra_2.3 bookdown_0.46 IRanges_2.38.1
## [100] scattermore_1.2 stats4_4.4.1 xfun_0.57
## [103] matrixStats_1.5.0 DT_0.34.0 stringi_1.8.7
## [106] lazyeval_0.2.2 yaml_2.3.12 evaluate_1.0.5
## [109] codetools_0.2-20 tibble_3.3.1 BiocManager_1.30.27
## [112] cli_3.6.5 uwot_0.2.4 RcppParallel_5.1.11-2
## [115] xtable_1.8-8 reticulate_1.45.0 jquerylib_0.1.4
## [118] Rcpp_1.1.1 globals_0.19.1 spatstat.random_3.4-5
## [121] png_0.1-9 spatstat.univar_3.1-7 parallel_4.4.1
## [124] dotCall64_1.2 listenv_0.10.1 viridisLite_0.4.3
## [127] scales_1.4.0 ggridges_0.5.7 purrr_1.2.1
## [130] crayon_1.5.3 GetoptLong_1.1.0 rlang_1.1.7
## [133] cowplot_1.2.0