library(tidyverse)
library(edgeR)
library(vsn)
library(matrixStats)
library(viridisLite)
gene.exp.count = readRDS('/Volumes/T7/genomics/complement_amp/count_bulk_rna_4-23-2024_v2.rds')
meta_all = readRDS("/Volumes/T7/genomics/complement_amp/meta_bulk_rna_4-23-2024_v2.rds")
# normalize to log(TPM+1)
gene.exp.norm.tpm <- t(t(gene.exp.count) / colSums(gene.exp.count) * 1e6)
colSums(gene.exp.norm.tpm)
## 100nM_C3a_1_S69_L007 100nM_C3a_1_S69_L008
## 1e+06 1e+06
## 100nM_C3a_2_S70_L007 100nM_C3a_2_S70_L008
## 1e+06 1e+06
## 100nM_C3a_3_S71_L007 100nM_C3a_3_S71_L008
## 1e+06 1e+06
## 100nM_C3a_4_S72_L007 100nM_C3a_4_S72_L008
## 1e+06 1e+06
## 100nM_C5a_1_S73_L007 100nM_C5a_1_S73_L008
## 1e+06 1e+06
## 100nM_C5a_2_S74_L007 100nM_C5a_2_S74_L008
## 1e+06 1e+06
## 100nM_C5a_3_S75_L007 100nM_C5a_3_S75_L008
## 1e+06 1e+06
## 100nM_C5a_4_S76_L007 100nM_C5a_4_S76_L008
## 1e+06 1e+06
## No_tx_1_S81_L007 No_tx_1_S81_L008
## 1e+06 1e+06
## No_tx_2_S82_L007 No_tx_2_S82_L008
## 1e+06 1e+06
## No_tx_3_S83_L007 No_tx_3_S83_L008
## 1e+06 1e+06
## No_tx_4_S84_L007 No_tx_4_S84_L008
## 1e+06 1e+06
## SF_1_S49_L007 SF_1_S49_L008
## 1e+06 1e+06
## SF_2_S50_L007 SF_2_S50_L008
## 1e+06 1e+06
## SF_3_S51_L007 SF_3_S51_L008
## 1e+06 1e+06
## SF_4_S52_L007 SF_4_S52_L008
## 1e+06 1e+06
## SF_T___100nM_C3a_1_S61_L007 SF_T___100nM_C3a_1_S61_L008
## 1e+06 1e+06
## SF_T___100nM_C3a_2_S62_L007 SF_T___100nM_C3a_2_S62_L008
## 1e+06 1e+06
## SF_T___100nM_C3a_3_S63_L007 SF_T___100nM_C3a_3_S63_L008
## 1e+06 1e+06
## SF_T___100nM_C3a_4_S64_L007 SF_T___100nM_C3a_4_S64_L008
## 1e+06 1e+06
## SF_T___100nM_C5a_1_S65_L007 SF_T___100nM_C5a_1_S65_L008
## 1e+06 1e+06
## SF_T___100nM_C5a_2_S66_L007 SF_T___100nM_C5a_2_S66_L008
## 1e+06 1e+06
## SF_T___100nM_C5a_3_S67_L007 SF_T___100nM_C5a_3_S67_L008
## 1e+06 1e+06
## SF_T___100nM_C5a_4_S68_L007 SF_T___100nM_C5a_4_S68_L008
## 1e+06 1e+06
## SF_T_1_S53_L007 SF_T_1_S53_L008
## 1e+06 1e+06
## SF_T_2_S54_L007 SF_T_2_S54_L008
## 1e+06 1e+06
## SF_T_3_S55_L007 SF_T_3_S55_L008
## 1e+06 1e+06
## SF_T_4_S56_L007 SF_T_4_S56_L008
## 1e+06 1e+06
tpm_to_log2tpm <- function(x) log(x + 1, base = 2)
log2tpm <- apply(as.data.frame(gene.exp.norm.tpm), 2, tpm_to_log2tpm)
log2tpm[1:4,1:4]
## 100nM_C3a_1_S69_L007 100nM_C3a_1_S69_L008 100nM_C3a_2_S70_L007
## A2MP1 0 0.0000000 0.000000
## A3GALT2 0 0.0000000 0.000000
## A4GNT 0 0.0000000 1.395835
## AACSP1 0 0.1768183 0.000000
## 100nM_C3a_2_S70_L008
## A2MP1 0.9861742
## A3GALT2 0.0000000
## A4GNT 1.1770817
## AACSP1 0.1892010
# Which genes are detected in 95% of samples? (# of non-zeros in log2tpm/ total # of samples)
nonzero_markers <- apply(log2tpm, 1, function(x) length(x[x>0])/length(x) > 0.95)
nonzero_genes <- log2tpm[nonzero_markers,]
common_genes <- rownames(nonzero_genes)
length(common_genes)
## [1] 2664
# For a given sample, how many common genes are detected? (# of non-zeros in the common genes per sample/ total # of genes)
# If < 95%, that would be a BAD sample.
temp<-which(log2tpm[,1]>0)
common_index<-which(nonzero_markers, arr.ind=TRUE)
percentage <- array(0,dim=c(ncol(log2tpm)))
num_genes_det <- array(0,dim=c(ncol(log2tpm)))
for (i in 1:ncol(log2tpm)){
temp<-which(log2tpm[,i]>0)
num_genes_det[i] <- length(temp)
inter<- intersect(common_index, temp)
per<-length(inter)/length(common_genes)
percentage[i] <- per
}
meta <- data.frame(
sample = colnames(log2tpm)
)
meta$percentage <- as.numeric(percentage)
meta[1:4,]
## sample percentage
## 1 100nM_C3a_1_S69_L007 0.9924925
## 2 100nM_C3a_1_S69_L008 1.0000000
## 3 100nM_C3a_2_S70_L007 0.9962462
## 4 100nM_C3a_2_S70_L008 1.0000000
bad_index <- meta[which(meta$percentage < 0.95), ]$sample
bad_index
## character(0)
options(repr.plot.width = 8, repr.plot.height = 4)
ggplot() +
geom_point(
data = meta[order(meta$percentage, decreasing = T),],
mapping = aes(sample, percentage),
shape = 21, size = 3, stroke = 0.1
) +
geom_hline(yintercept = 0.95, color = "blue", linetype = "dashed") +
# scale_fill_manual(values = c("#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072"), name = "") +
labs(
x = "samples", y = "Percent of common gene detected"
) +
scale_y_continuous(labels=scales::percent) +
theme_classic() +
theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1))
ggplot(meta, aes(x = 100 * percentage)) +
geom_histogram(bins = 100) +
geom_vline(xintercept = 95, color = "blue", linetype = "dashed") +
scale_y_continuous(breaks = scales::pretty_breaks(4)) +
# coord_cartesian(xlim=c(80,100)) +
labs(
x = "Percent", y = "# Samples", # title = "Percent of Common Genes",
subtitle = sprintf(
"%s common genes detected in\n %s (95%%) of samples",
formattable::comma(length(common_genes)), ceiling(nrow(meta) * 0.95), nrow(meta)
)
) +
theme_classic() +
theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1))
# ggsave("qc2.png", width = 5, height = 3, dpi = 300)
Get location and gc content for the genes
library(BSgenome.Hsapiens.UCSC.hg38)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene::TxDb.Hsapiens.UCSC.hg38.knownGene
gene_coords <- AnnotationDbi::select(txdb,
keys = keys(txdb, keytype = "TXNAME"),
columns = c("TXSTART", "TXEND", "GENEID", "TXCHROM"),
keytype = "TXNAME")
gene_coords$TXNAME_no_version <- sub("\\..*", "", gene_coords$TXNAME)
mapping <- AnnotationDbi::select(org.Hs.eg.db,
keys = keys(org.Hs.eg.db, keytype = "ENSEMBL"),
columns = c("SYMBOL", "ENSEMBLTRANS","ENTREZID"),
keytype = "ENSEMBL")
mapping$ENSEMBLTRANS_no_version <- sub("\\..*", "", mapping$ENSEMBLTRANS)
merged_data <- merge(gene_coords, mapping, by.x = "TXNAME_no_version", by.y = "ENSEMBLTRANS_no_version", all.x = TRUE)
merged_data = merged_data %>%
dplyr::select(-c(ENSEMBL,ENSEMBLTRANS))
filtered_data <- merged_data %>%
filter(!is.na(SYMBOL) & SYMBOL != "") %>%
distinct(SYMBOL, .keep_all = TRUE)
calcCGContent <- function(sequence) {
cg_count <- sum(toupper(charToRaw(sequence)) %in% c(charToRaw('C'), charToRaw('G')))
total_count <- nchar(sequence)
return(cg_count / total_count)
}
library(Rcpp)
cppFunction("
double calcCGContent(std::string sequence) {
int cg_count = 0;
int total_count = sequence.size(); // Total number of characters in the sequence
for (int i = 0; i < total_count; ++i) {
char nucleotide = toupper(sequence[i]);
if (nucleotide == 'C' || nucleotide == 'G') {
cg_count++;
}
}
return total_count > 0 ? (double)cg_count / total_count : 0;
}
")
calcCGContent <- function(sequence) {
cg_count <- sum(toupper(charToRaw(sequence)) %in% c(charToRaw('C'), charToRaw('G')))
total_count <- nchar(sequence)
return(cg_count / total_count)
}
plan(multisession, workers = 4)
results <- future_sapply(seq_along(filtered_data$TXSTART), function(i) {
chrom <- filtered_data$TXCHROM[i]
start <- filtered_data$TXSTART[i]
end <- filtered_data$TXEND[i]
if (!is.na(chrom) && !is.na(start) && !is.na(end)) {
seq <- as.character(getSeq(BSgenome.Hsapiens.UCSC.hg38, names = chrom, start = start, end = end))
return(calcCGContent(seq))
} else {
return(NA)
}
}, simplify = "vector")
filtered_data$CGContent <- results
#saveRDS(filtered_data, "gene_metadata_location.rds")
# contains information about location TXCHROM , TXSTART , TXEND and CG content of each gene
filtered_data = readRDS("/Volumes/T7/genomics/complement_amp/gene_metadata_location.rds")
dim(filtered_data)
## [1] 13005 9
dim(gene.exp.count)
## [1] 12861 56
filtered_data$length = filtered_data$TXEND - filtered_data$TXSTART
head(filtered_data)
## TXNAME_no_version TXNAME GENEID TXCHROM TXSTART TXEND
## 1 ENST00000000412 ENST00000000412.8 4074 chr12 8940361 8949645
## 2 ENST00000001008 ENST00000001008.6 2288 chr12 2794970 2805423
## 3 ENST00000001146 ENST00000001146.7 56603 chr2 72129238 72147862
## 4 ENST00000004103 ENST00000004103.8 55365 chr7 150800769 150805118
## 5 ENST00000005178 ENST00000005178.6 5166 chr7 95583499 95596516
## 6 ENST00000005180 ENST00000005180.9 10344 chr7 75769538 75772211
## SYMBOL ENTREZID CGContent length
## 1 M6PR 4074 0.4208939 9284
## 2 FKBP4 2288 0.5153051 10453
## 3 CYP26B1 56603 0.5984966 18624
## 4 TMEM176A 55365 0.5604598 4349
## 5 PDK4 5166 0.3608081 13017
## 6 CCL26 10344 0.4925206 2673
#Gene filtering
gene.exp.count_gc = gene.exp.count
gene.exp.count_gc = gene.exp.count_gc[rowSums(gene.exp.count_gc) > 10,]
#gene.exp.count_gc= gene.exp.count[rownames(gene.exp.count) %in% filtered_data$SYMBOL,]
filtered_data_lastver = filtered_data[filtered_data$SYMBOL %in% rownames(gene.exp.count_gc),]
dim(filtered_data_lastver)
## [1] 5838 10
dim(gene.exp.count_gc)
## [1] 5863 56
gene.exp.count_gc = gene.exp.count_gc[filtered_data_lastver$SYMBOL,] # Now they are in the same order
library(edgeR)
library(EDASeq)
rownames(filtered_data_lastver) = filtered_data_lastver$SYMBOL
filtered_data_lastver <- filtered_data_lastver[rownames(gene.exp.count_gc), ]
pheno_data <- data.frame(
conditions = factor(meta_all$group),
lane = meta_all$lane,
replicate = meta_all$replicate,
row.names = colnames(gene.exp.count_gc)
)
pheno_annotated <- AnnotatedDataFrame(data = pheno_data)
# Setup feature data, ensuring alignment with the gene counts
feature_data <- data.frame(
gc = filtered_data_lastver$CGContent,
length = filtered_data_lastver$length,
row.names = rownames(gene.exp.count_gc)
)
counts <- newSeqExpressionSet(counts = gene.exp.count_gc,
phenoData = pheno_annotated,
featureData = AnnotatedDataFrame(data = feature_data))
meanVarPlot(counts, log=TRUE)
meanVarPlot(counts)
upperdataWithin <- withinLaneNormalization(counts,"gc", which="upper")
norms = betweenLaneNormalization(upperdataWithin, which="median")
genes_exclude <- grep("^(MT|RPL|RPS|MALAT1|MIR-)", row.names(norms@assayData$normalizedCounts), value = TRUE)
meta_all$group <- ifelse(meta_all$group %in% c("100nM_C3a", "100nM_C5a"),
paste0("a",meta_all$group ),
meta_all$group)
cond <- factor(meta_all$group)
lane = meta_all$lane
replicate = meta_all$replicate
# remove batch effect
design <- model.matrix(~ 0 + cond + lane + replicate ) # Appropriate way to adjust for batch (4.5.7 edgeR manual)
colnames(design) = gsub("cond","",colnames(design))
contrast <- makeContrasts(
a100nM_C5a_vs_No_tx = a100nM_C5a - No_tx,
SF_T___100nM_C5a_vs_No_tx = SF_T___100nM_C5a - No_tx,
levels = design
)
temp <- norms@assayData$normalizedCounts[!rownames(norms@assayData$normalizedCounts) %in% genes_exclude,]
replicate = meta_all$replicate
# group <- factor(meta_all$group)
lanes = meta_all$lane
y <- DGEList(counts=temp, group=cond)
#y <- DGEList(counts=gene.exp.count_gc, group=meta_all$group)
y <- calcNormFactors(y, method = "TMM")
y <- estimateDisp(y, design)
# fit <- glmQLFit(y, design) #Quassi
# plotBCV(y)
fit <- glmFit(y, design)
colnames(design)
## [1] "a100nM_C3a" "a100nM_C5a" "No_tx" "SF"
## [5] "SF_T" "SF_T___100nM_C3a" "SF_T___100nM_C5a" "laneL008"
## [9] "replicate2" "replicate3" "replicate4"
contrast_list = list()
for (i in colnames(contrast)){
contrast_list[[i]] = glmLRT(fit, contrast=contrast[, i])
}
#contrast_list = readRDS("./contrast_list.rds")
length(contrast_list)
## [1] 2
juan = contrast_list
all_output_juan= data.frame(logfc = numeric(), pval = numeric(), pval_adj = numeric())
for (i in names(juan)){
out = data.frame(
Gene = rownames(juan[[i]]$table),
condition = i,
logfc = juan[[i]]$table$logFC,
pval =juan[[i]]$table$PValue,
pval_adj = p.adjust(juan[[i]]$table$PValue, method = "BH"))
all_output_juan = rbind(all_output_juan,out)
}
saveRDS(all_output_juan, "DEG_res.rds")
library(stringr)
library(tidyverse)
FC <- 2 # Fold change threshold
anno_degs_num <- 10 # Number of genes to annotate
tmp <- all_output_juan
conditions_to_print <- c(
"a100nM_C5a_vs_No_tx",
"SF_T___100nM_C5a_vs_No_tx"
)
unique_conditions <- unique(tmp$condition)
unique_conditions <- unique_conditions[unique_conditions %in% conditions_to_print]
for (conditions in unique_conditions) {
condition1 <- str_split(conditions, pattern = "_vs_", simplify = TRUE)[, 1]
condition2 <- str_split(conditions, pattern = "_vs_", simplify = TRUE)[, 2]
current_data <- tmp[tmp$condition == conditions, ]
vol <- ggplot(current_data, aes(x = logfc, y = -log10(pval_adj)))
sig_up <- current_data[current_data$logfc > log2(FC) & current_data$pval_adj < 0.05, ]
sig_down <- current_data[current_data$logfc < -log2(FC) & current_data$pval_adj < 0.05, ]
sig_up2 <- current_data[current_data$logfc > log2(FC) & current_data$pval_adj > 0.05, ]
sig_down2 <- current_data[current_data$logfc < -log2(FC) & current_data$pval_adj > 0.05, ]
sig_up3 <- current_data[current_data$logfc > 0 & current_data$pval_adj < 0.05, ]
sig_down3 <- current_data[current_data$logfc < 0 & current_data$pval_adj < 0.05, ]
insig <- current_data[
current_data$logfc < log2(FC) &
current_data$logfc > -log2(FC) &
current_data$pval_adj > 0.05,
]
sig_up <- sig_up[order(sig_up$pval_adj), ]
sig_down <- sig_down[order(sig_down$pval_adj), ]
size <- 2
g <- vol +
ggtitle(label = conditions) +
geom_point(data = sig_up, col = "red", alpha = 1, size = 1) +
geom_point(data = sig_down, col = "blue", alpha = 1, size = 1) +
geom_point(data = sig_up2, col = "pink", alpha = 0.5, size = 1) +
geom_point(data = sig_down2, col = "skyblue", alpha = 0.5, size = 1) +
geom_point(data = sig_up3, col = "pink", alpha = 0.5, size = 1) +
geom_point(data = sig_down3, col = "skyblue", alpha = 0.5, size = 1) +
geom_point(data = insig, col = "grey", alpha = 0.5, size = 1) +
ggrepel::geom_text_repel(
data = sig_up,
aes(label = Gene),
max.overlaps = 25,
fontface = "italic",
size = size,
segment.color = "grey"
) +
ggrepel::geom_text_repel(
data = sig_down,
aes(label = Gene),
max.overlaps = 25,
fontface = "italic",
size = size,
segment.color = "grey"
) +
theme_bw(base_size = 14) +
theme(
legend.position = "right",
axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15)
) +
xlab(bquote(log[2](.(condition1) / .(condition2)))) +
ylab(expression(-log[10]("Adjusted P-value"))) +
geom_hline(yintercept = -log10(0.05), colour = "#990000", linetype = "dashed") +
geom_vline(xintercept = log2(FC), colour = "#990000", linetype = "dashed") +
geom_vline(xintercept = -log2(FC), colour = "#990000", linetype = "dashed")
file_name <- paste0(gsub("[[:punct:]]", "_", conditions), "_deg_volcano.pdf")
# ggsave(filename = file_name, plot = g, width = 10, height = 8, dpi = 300)
print(g)
}
getwd()
## [1] "/Volumes/T7/genomics"
library(stringr)
library(tidyverse)
library(ggrepel)
FC <- 2
tmp <- all_output_juan
# Only plot this one condition
condition_use <- "SF_T___100nM_C5a_vs_No_tx"
current_data <- tmp[tmp$condition == condition_use, ]
# Optional check
dim(current_data)
## [1] 5341 5
unique(current_data$condition)
## [1] "SF_T___100nM_C5a_vs_No_tx"
sig_up <- current_data[current_data$logfc > log2(FC) & current_data$pval_adj < 0.05, ]
sig_down <- current_data[current_data$logfc < -log2(FC) & current_data$pval_adj < 0.05, ]
sig_up2 <- current_data[current_data$logfc > log2(FC) & current_data$pval_adj > 0.05, ]
sig_down2 <- current_data[current_data$logfc < -log2(FC) & current_data$pval_adj > 0.05, ]
sig_up3 <- current_data[current_data$logfc > 0 & current_data$pval_adj < 0.05, ]
sig_down3 <- current_data[current_data$logfc < 0 & current_data$pval_adj < 0.05, ]
insig <- current_data[
current_data$logfc < log2(FC) &
current_data$logfc > -log2(FC) &
current_data$pval_adj > 0.05,
]
sig_up <- sig_up[order(sig_up$pval_adj), ]
sig_down <- sig_down[order(sig_down$pval_adj), ]
size <- 4.2
# Only visualize selected genes
up <- c(
"C5AR2", "CCL18", "LYVE1", "SOCS3", "DUSP4", "G0S2",
"CXCL1", "CXCL2", "CXCL3", "CCL8", "CCL23", "CXCL5",
"CXCL10", "CCL18", "CXCR4", "IL27", "FABP4", "TNFRSF613",
"ENO2", "CNTF", "INHBE", "PF4", "GEM", "ITGB3",
"PPBP", "MMP12"
)
sig_up_label <- sig_up[sig_up$Gene %in% up, ]
down <- c(
"CFD", "PECAM1", "ABI3", "GAPT", "MARCKSL1", "GPR82",
"XYLT1", "NCF1C", "ATP6V0D2", "TMEM119", "EBI3", "FGL2"
)
sig_down_label <- sig_down[sig_down$Gene %in% down, ]
# Check labeled genes
dim(sig_up_label)
## [1] 22 5
dim(sig_down_label)
## [1] 12 5
options(repr.plot.height = 7, repr.plot.width = 9)
p = ggplot(current_data, aes(x = logfc, y = -log10(pval_adj))) +
geom_point(data = sig_up, col = "red", alpha = 1, size = 1) +
geom_point(data = sig_down, col = "blue", alpha = 1, size = 1) +
geom_point(data = sig_up_label, col = "red", alpha = 1, size = 2.5) +
geom_point(data = sig_down_label, col = "blue", alpha = 1, size = 2.5) +
geom_point(data = sig_up2, col = "pink", alpha = 0.5, size = 1) +
geom_point(data = sig_down2, col = "skyblue", alpha = 0.5, size = 1) +
geom_point(data = sig_up3, col = "pink", alpha = 0.5, size = 1) +
geom_point(data = sig_down3, col = "skyblue", alpha = 0.5, size = 1) +
geom_point(data = insig, col = "grey", alpha = 0.5, size = 1) +
geom_hline(
yintercept = -log10(0.05),
colour = "#990000",
linetype = "dashed"
) +
geom_vline(
xintercept = log2(FC),
colour = "#990000",
linetype = "dashed"
) +
geom_vline(
xintercept = -log2(FC),
colour = "#990000",
linetype = "dashed"
) +
ggrepel::geom_label_repel(
data = sig_up_label,
aes(label = Gene),
max.overlaps = 30,
fontface = "italic",
size = size,
segment.color = "grey",
box.padding = 0.5
) +
ggrepel::geom_label_repel(
data = sig_down_label,
aes(label = Gene),
max.overlaps = 30,
fontface = "italic",
size = size,
segment.color = "grey",
box.padding = 0.5
) +
theme_bw(base_size = 18) +
theme(
legend.position = "right",
axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15)
) +
xlab(bquote(log[2](.("SF_T rC5a") / .("control")))) +
ylab(expression(-log[10]("Adjusted p")))
print(p)
#ggsave("volcano_c5.pdf", width = 9, height = 7, dpi = 300)
ggsave(
filename = "SF_T_100nM_C5a_vs_No_tx_volcano.png",
plot = p,
width = 9,
height = 7,
dpi = 300,
units = "in"
)
library(stringr)
library(tidyverse)
library(ggrepel)
FC <- 2
tmp <- all_output_juan
# Only plot this condition
condition_use <- "a100nM_C5a_vs_No_tx"
current_data <- tmp[tmp$condition == condition_use, ]
# Optional checks
dim(current_data)
## [1] 5341 5
unique(current_data$condition)
## [1] "a100nM_C5a_vs_No_tx"
sig_up <- current_data[current_data$logfc > log2(FC) & current_data$pval_adj < 0.05, ]
sig_down <- current_data[current_data$logfc < -log2(FC) & current_data$pval_adj < 0.05, ]
sig_up2 <- current_data[current_data$logfc > log2(FC) & current_data$pval_adj > 0.05, ]
sig_down2 <- current_data[current_data$logfc < -log2(FC) & current_data$pval_adj > 0.05, ]
sig_up3 <- current_data[current_data$logfc > 0 & current_data$pval_adj < 0.05, ]
sig_down3 <- current_data[current_data$logfc < 0 & current_data$pval_adj < 0.05, ]
insig <- current_data[
current_data$logfc < log2(FC) &
current_data$logfc > -log2(FC) &
current_data$pval_adj > 0.05,
]
sig_up <- sig_up[order(sig_up$pval_adj), ]
sig_down <- sig_down[order(sig_down$pval_adj), ]
size <- 4.2
# Only visualize selected genes
up <- c(
"CXCR4", "CXCL3", "CXCL5", "CCL3", "CCL22", "DUSP5",
"SLC7A5", "SERPINE1", "CNIH3", "FABP4", "EREG",
"ICAM5", "DKK3", "PDCD1", "ITGB3", "GREM1",
"SLC28A3", "MMP12", "C2orf72", "TBX21", "NOTCH3"
)
sig_up_label <- sig_up[sig_up$Gene %in% up, ]
down <- c(
"MAF", "PECAM1", "IGFBP4", "CFD", "GAPT", "HSPA6",
"ABI3", "CFB", "CEBPD", "SELENOP", "CEACAM4"
)
sig_down_label <- sig_down[sig_down$Gene %in% down, ]
# Check labeled genes
dim(sig_up_label)
## [1] 20 5
dim(sig_down_label)
## [1] 10 5
options(repr.plot.height = 7, repr.plot.width = 9)
p = ggplot(current_data, aes(x = logfc, y = -log10(pval_adj))) +
geom_point(data = sig_up, col = "red", alpha = 1, size = 1) +
geom_point(data = sig_down, col = "blue", alpha = 1, size = 1) +
geom_point(data = sig_up_label, col = "red", alpha = 1, size = 2.5) +
geom_point(data = sig_down_label, col = "blue", alpha = 1, size = 2.5) +
geom_point(data = sig_up2, col = "pink", alpha = 0.5, size = 1) +
geom_point(data = sig_down2, col = "skyblue", alpha = 0.5, size = 1) +
geom_point(data = sig_up3, col = "pink", alpha = 0.5, size = 1) +
geom_point(data = sig_down3, col = "skyblue", alpha = 0.5, size = 1) +
geom_point(data = insig, col = "grey", alpha = 0.5, size = 1) +
geom_hline(
yintercept = -log10(0.05),
colour = "#990000",
linetype = "dashed"
) +
geom_vline(
xintercept = log2(FC),
colour = "#990000",
linetype = "dashed"
) +
geom_vline(
xintercept = -log2(FC),
colour = "#990000",
linetype = "dashed"
) +
ggrepel::geom_label_repel(
data = sig_up_label,
aes(label = Gene),
max.overlaps = 30,
fontface = "italic",
size = size,
segment.color = "grey",
box.padding = 0.5
) +
ggrepel::geom_label_repel(
data = sig_down_label,
aes(label = Gene),
max.overlaps = 30,
fontface = "italic",
size = size,
segment.color = "grey",
box.padding = 0.5
) +
theme_bw(base_size = 18) +
theme(
legend.position = "right",
axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15)
) +
xlab(bquote(log[2](.("rC5a") / .("control")))) +
ylab(expression(-log[10]("Adjusted p")))
print(p)
ggsave(
filename = "C5a_vs_No_tx_volcano.png",
plot = p,
width = 9,
height = 7,
dpi = 300,
units = "in"
)
#ggsave("volcano_c5_notx.pdf", width = 9, height = 7, dpi = 300)