library(scLASER)
library(dplyr)
library(tidyr)
library(ggplot2)

1. Simulate a scenario that captures the longitudinal evolution of interacting cell types across visits and their divergence between groups.

Longitudinal changes in cell-type abundance are specified using effect design matrices that explicitly encode how each cell type evolves across visits and differs between groups.

In this framework:

Two matrices are provided:


2. Simulate a two-timepoint case consisting of baseline (V0) and follow-up (V1).

cell_types  <- LETTERS[1:(7 + 3)]  # A-J
time_points <- 2                   # V0, V1

E_prog <- matrix(
  0, nrow = length(cell_types), ncol = time_points,
  dimnames = list(cell_types, paste0("V", 0:(time_points - 1)))
)

E_ctrl <- matrix(
  0, nrow = length(cell_types), ncol = time_points,
  dimnames = list(cell_types, paste0("V", 0:(time_points - 1)))
)

E_prog["A", ] <- c(0.00, +0.30)
E_prog["B", ] <- c(0.00, -0.30)
E_prog["I", ] <- c(0.00, +0.30)
E_prog["J", ] <- c(0.00, -0.30)

set.seed(2020)
d2 <- generate_dummy_data(
  n_cells = 150,
  sd_celltypes = 0.1,
  n_major_cell_types = 7,
  n_minor_cell_types = 3,
  relative_abundance = 0.4,
  n_major_interact_celltypes = 2,
  n_minor_interact_celltypes = 2,
  n_individuals = 30,
  n_batchs = 4,
  interaction_feature = "visit",
  time_points = 2,
  test_var = "disease",
  prop_disease = 0.5,
  seed = 2020,
  effect_mat_progressor = E_prog,
  effect_mat_control    = E_ctrl
)

obj2 <- scLASER(metadata = d2)
plot_celltype_proportions(
  obj2,
  disease_var = "disease",
  disease_labels = c("0" = "Non-responder", "1" = "Responder"),
  disease_colors = c("Non-responder" = "#4DAF4A", "Responder" = "#984EA3"), highlight_cell_types = NULL
)


E_prog
#>   V0   V1
#> A  0  0.3
#> B  0 -0.3
#> C  0  0.0
#> D  0  0.0
#> E  0  0.0
#> F  0  0.0
#> G  0  0.0
#> H  0  0.0
#> I  0  0.3
#> J  0 -0.3

E_ctrl
#>   V0 V1
#> A  0  0
#> B  0  0
#> C  0  0
#> D  0  0
#> E  0  0
#> F  0  0
#> G  0  0
#> H  0  0
#> I  0  0
#> J  0  0

3. Simulate a three-timepoint case (V0, V1, V2)

time_points <- 3

E_prog <- matrix(
  0, nrow = length(cell_types), ncol = time_points,
  dimnames = list(cell_types, paste0("V", 0:(time_points - 1)))
)

E_ctrl <- matrix(
  0, nrow = length(cell_types), ncol = time_points,
  dimnames = list(cell_types, paste0("V", 0:(time_points - 1)))
)

E_prog["A", ] <- c(0.00, 0.30, 0.60)
E_prog["B", ] <- c(0.00, -0.30, -0.60)
E_prog["I", ] <- c(0.00, 0.30, 0.60)
E_prog["J", ] <- c(0.00, -0.30, -0.60)

d3 <- generate_dummy_data(
  n_cells = 150,
  sd_celltypes = 0.1,
  n_major_cell_types = 7,
  n_minor_cell_types = 3,
  relative_abundance = 0.4,
  n_major_interact_celltypes = 2,
  n_minor_interact_celltypes = 2,
  n_individuals = 30,
  n_batchs = 4,
  interaction_feature = "visit",
  time_points = time_points,
  test_var = "disease",
  prop_disease = 0.5,
  seed = 2020,
  effect_mat_progressor = E_prog,
  effect_mat_control    = E_ctrl
)

obj3 <- scLASER(metadata = d3)
plot_celltype_proportions(
  obj3,
  disease_var = "disease",
  disease_labels = c("0" = "Non-responder", "1" = "Responder"),
  disease_colors = c("Non-responder" = "#4DAF4A", "Responder" = "#984EA3"), highlight_cell_types = NULL
)

E_prog
#>   V0   V1   V2
#> A  0  0.3  0.6
#> B  0 -0.3 -0.6
#> C  0  0.0  0.0
#> D  0  0.0  0.0
#> E  0  0.0  0.0
#> F  0  0.0  0.0
#> G  0  0.0  0.0
#> H  0  0.0  0.0
#> I  0  0.3  0.6
#> J  0 -0.3 -0.6

E_ctrl
#>   V0 V1 V2
#> A  0  0  0
#> B  0  0  0
#> C  0  0  0
#> D  0  0  0
#> E  0  0  0
#> F  0  0  0
#> G  0  0  0
#> H  0  0  0
#> I  0  0  0
#> J  0  0  0

4. Simulate a five-timepoint case

time_points <- 5
traj <- c(0.00, 0.30, 0.60, 0.20, 0.40)

E_prog <- matrix(
  0, nrow = length(cell_types), ncol = time_points,
  dimnames = list(cell_types, paste0("V", 0:(time_points - 1)))
)

E_ctrl <- matrix(
  0, nrow = length(cell_types), ncol = time_points,
  dimnames = list(cell_types, paste0("V", 0:(time_points - 1)))
)

E_prog[c("A","I"), ] <- matrix(traj,  nrow = 2, ncol = time_points, byrow = TRUE)
E_prog[c("B","J"), ] <- matrix(-traj, nrow = 2, ncol = time_points, byrow = TRUE)

d5 <- generate_dummy_data(
  n_cells = 150,
  sd_celltypes = 0.1,
  n_major_cell_types = 7,
  n_minor_cell_types = 3,
  relative_abundance = 0.4,
  n_major_interact_celltypes = 2,
  n_minor_interact_celltypes = 2,
  n_individuals = 30,
  n_batchs = 4,
  interaction_feature = "visit",
  time_points = time_points,
  test_var = "disease",
  prop_disease = 0.5,
  seed = 2020,
  effect_mat_progressor = E_prog,
  effect_mat_control    = E_ctrl
)

obj5 <- scLASER(metadata = d5)

E_prog
#>   V0   V1   V2   V3   V4
#> A  0  0.3  0.6  0.2  0.4
#> B  0 -0.3 -0.6 -0.2 -0.4
#> C  0  0.0  0.0  0.0  0.0
#> D  0  0.0  0.0  0.0  0.0
#> E  0  0.0  0.0  0.0  0.0
#> F  0  0.0  0.0  0.0  0.0
#> G  0  0.0  0.0  0.0  0.0
#> H  0  0.0  0.0  0.0  0.0
#> I  0  0.3  0.6  0.2  0.4
#> J  0 -0.3 -0.6 -0.2 -0.4

E_ctrl
#>   V0 V1 V2 V3 V4
#> A  0  0  0  0  0
#> B  0  0  0  0  0
#> C  0  0  0  0  0
#> D  0  0  0  0  0
#> E  0  0  0  0  0
#> F  0  0  0  0  0
#> G  0  0  0  0  0
#> H  0  0  0  0  0
#> I  0  0  0  0  0
#> J  0  0  0  0  0

plot_celltype_proportions(
  obj5,
  disease_var = "disease",
  disease_labels = c("0" = "Non-responder", "1" = "Responder"),
  disease_colors = c("Non-responder" = "#4DAF4A", "Responder" = "#984EA3"), highlight_cell_types = NULL)


5. (Optional) Generate pseudo-PCs

pcs_matrix <- generate_pseudo_pcs_time(
  d3,
  n_pcs = 20,
  cluster_pcs = 1:20,
  disease_pcs = 0,
  sex_pcs = 0,
  age_pcs = 0,
  bmi_pcs = 0,
  batch_pcs = 0,
  interaction_pcs = 0,
  visit_pcs = 0,
  subject_pcs = 0,
  cluster_ratio = 0.8,
  disease_ratio = 0,
  sex_ratio = 0,
  age_ratio = 0,
  bmi_ratio = 0,
  batch_ratio = 0,
  interaction_ratio = 0,
  visit_ratio = 0,
  subject_ratio = 0,
  scale_factor = 5,
  cluster_col = "cell_type",
  sex_col = "sex",
  age_col = "age",
  bmi_col = "bmi",
  batch_col = "batch",
  disease_col = "disease",
  visit_col = "visit",
  seed = 1234
)
dim(pcs_matrix)
#> [1] 110389     20