Differential expression analysis of Hypothalamus astrocytes dataset from Romanov RA et al 2020

Author

Evgenii O. Tretiakov

Published

July 7, 2023

Load data and setup parameters

Code

# Load tidyverse infrastructure packages
suppressPackageStartupMessages({
  library(future)
  library(here)
  library(tidyverse)
  library(magrittr)
  library(stringr)
  library(skimr)
  library(RColorBrewer)
  library(viridis)
})


# Load packages for scRNA-seq analysis and visualisation
suppressPackageStartupMessages({
  library(ggplot2)
  library(cowplot)
  library(patchwork)
  library(ggstatsplot)
  library(anndata)
  library(sceasy)
  library(Seurat)
  library(SeuratDisk)
  library(SeuratWrappers)
  library(scCustomize)
})

sc <- import("scanpy", convert = FALSE)

Set paths

Code

src_dir <- here("code")
data_dir <- here("../data")
output_dir <- here("output")
plots_dir <- here(output_dir, "figures")
tables_dir <- here(output_dir, "tables")

Load helper functions and gene-sets

Code

source(here(src_dir, "genes.R"))
source(here(src_dir, "functions.R"))

Set fixed variables

Code

# set seed
reseed <- 42
set.seed(seed = reseed)

# Parameters for parallel execution
n_cores <- 32
plan("multisession", workers = n_cores)
options(
  future.globals.maxSize = 100000 * 1024^2,
  future.rng.onMisuse = "ignore"
)
plan()

multisession:
- args: function (..., workers = 32, envir = parent.frame())
- tweaked: TRUE
- call: plan("multisession", workers = n_cores)

Code

# ggplot2 theme
theme_set(ggmin::theme_powerpoint())

Code

bioproject <- "PRJNA548917"
project <- "romanov2020_Hypoth-dev"
cb_fpr <- 0.001
low_cutoff_gene <- 500
high_cutoff_gene <- NULL
high_cutoff_gene <- 6000
low_cutoff_umis <- NULL
low_cutoff_umis <- -Inf
high_cutoff_umis <- 20000
high_cutoff_pc_mt <- 15
high_cutoff_pc_ribo <- 15
high_cutoff_pc_hb <- 0.1
high_cutoff_doublet_score <- 0.33
high_cutoff_complexity <- 0.85
connectivity_model <- "min_tree"
k <- 10
metric <- "euclidean"
signature <- 100

Load predicted astrocytes data and subset from Romanov RA et al 2020

Code

anndata <- sc$read(here(
  data_dir,
  sprintf("resolved_subregions_by_microclusters/best_xgboost-subregional_%s-astrocytes_dataset-msp_%s-metric_%s-k_%s-sign_%s-amb_%s.h5ad", bioproject, connectivity_model, metric, k, signature, cb_fpr)
))

Convert adata object to R AnnDataR6 object.

Code

adata <- py_to_r(anndata)
class(adata)

[1] "AnnDataR6" "R6"

Code

class(adata$X)

[1] "matrix" "array"

Code

adata

AnnData object with n_obs × n_vars = 3042 × 21989
    obs: 'nCount_RAW', 'nFeature_RAW', 'nCount_RNA', 'nFeature_RNA', 'orig.ident', 'nFeature_Diff', 'nCount_Diff', 'percent_mito', 'percent_ribo', 'percent_mito_ribo', 'percent_hb', 'log10GenesPerUMI', 'cell_name', 'barcode', 'latent_RT_efficiency', 'latent_cell_probability', 'latent_scale', 'doublet_score', 'predicted_doublets', 'QC', 'var_regex', 'RNA_snn_res.0.5', 'RNA_snn_res.0.730162924108181', 'RNA_snn_res.1.22140339759076', 'RNA_snn_res.3.000001', 'seurat_clusters', 'k_tree', 'comb_clstr1', 'S.Score', 'G2M.Score', 'Phase', 'nCount_SCT', 'nFeature_SCT', 'SCT_snn_res.1', 'SCT_snn_res.1.10049753133357', 'SCT_snn_res.1.22022353686838', 'SCT_snn_res.1.36528036285893', 'SCT_snn_res.1.54465801813792', 'SCT_snn_res.1.77217142500043', 'SCT_snn_res.2.0701989287267', 'SCT_snn_res.2.47753434006845', 'SCT_snn_res.3.06781889081486', 'SCT_snn_res.4.00000100000001', 'bioproject', 'project', 'model', 'tech', 'region', 'sex', 'stage', 'libname', 'expbtch', 'condit', 'ora_celltype', 'train', 'test', 'Regulon(Ahr)', 'Regulon(Atf3)', 'Regulon(Brca1)', 'Regulon(Dlx5)', 'Regulon(E2f1)', 'Regulon(E2f2)', 'Regulon(E2f7)', 'Regulon(Ebf1)', 'Regulon(Egr2)', 'Regulon(Elf4)', 'Regulon(Fli1)', 'Regulon(Fos)', 'Regulon(Fosb)', 'Regulon(Fosl1)', 'Regulon(Foxo1)', 'Regulon(Gata1)', 'Regulon(Gata2)', 'Regulon(Gata3)', 'Regulon(Gata4)', 'Regulon(Hes7)', 'Regulon(Hnf4g)', 'Regulon(Ikzf1)', 'Regulon(Jun)', 'Regulon(Junb)', 'Regulon(Jund)', 'Regulon(Klf2)', 'Regulon(Mafk)', 'Regulon(Mef2c)', 'Regulon(Mxi1)', 'Regulon(Nfe2l3)', 'Regulon(Nfil3)', 'Regulon(Nfya)', 'Regulon(Nkx6-2)', 'Regulon(Nr1d1)', 'Regulon(Pax5)', 'Regulon(Prdm1)', 'Regulon(Rarg)', 'Regulon(Rfx4)', 'Regulon(Runx1)', 'Regulon(Runx2)', 'Regulon(Sox9)', 'Regulon(Sp2)', 'Regulon(Sp5)', 'Regulon(Stat3)', 'Regulon(Tal1)', 'Regulon(Tbx19)', 'Regulon(Tbx21)', 'Regulon(Tcf4)', 'Regulon(Tead1)', 'Regulon(Zfp109)', 'Regulon(Zfp319)', 'Regulon(Zic3)', 'predict_logit_subregion', 'predict_xgboost_subregion'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'Regulon(Ahr)', 'Regulon(Atf3)', 'Regulon(Brca1)', 'Regulon(Dlx5)', 'Regulon(E2f1)', 'Regulon(E2f2)', 'Regulon(E2f7)', 'Regulon(Ebf1)', 'Regulon(Egr2)', 'Regulon(Elf4)', 'Regulon(Fli1)', 'Regulon(Fos)', 'Regulon(Fosb)', 'Regulon(Fosl1)', 'Regulon(Foxo1)', 'Regulon(Gata1)', 'Regulon(Gata2)', 'Regulon(Gata3)', 'Regulon(Gata4)', 'Regulon(Hes7)', 'Regulon(Hnf4g)', 'Regulon(Ikzf1)', 'Regulon(Jun)', 'Regulon(Junb)', 'Regulon(Jund)', 'Regulon(Klf2)', 'Regulon(Mafk)', 'Regulon(Mef2c)', 'Regulon(Mxi1)', 'Regulon(Nfe2l3)', 'Regulon(Nfil3)', 'Regulon(Nfya)', 'Regulon(Nkx6-2)', 'Regulon(Nr1d1)', 'Regulon(Pax5)', 'Regulon(Prdm1)', 'Regulon(Rarg)', 'Regulon(Rfx4)', 'Regulon(Runx1)', 'Regulon(Runx2)', 'Regulon(Sox9)', 'Regulon(Sp2)', 'Regulon(Sp5)', 'Regulon(Stat3)', 'Regulon(Tal1)', 'Regulon(Tbx19)', 'Regulon(Tbx21)', 'Regulon(Tcf4)', 'Regulon(Tead1)', 'Regulon(Zfp109)', 'Regulon(Zfp319)', 'Regulon(Zic3)'
    uns: 'aucell', 'k_tree_colors', 'log1p', 'name', 'ora_celltype_colors', 'predict_logit_subregion_colors'
    obsm: 'X_aucell', 'X_pacmap', 'X_pca', 'X_umap', 'X_umap_logit', 'X_umap_subfunct', 'ora_estimate', 'ora_pvals'

Code

srt_path <- here(
  data_dir,
  sprintf("resolved_subregions_by_microclusters/best_xgboost-subregional_%s-astrocytes_dataset-msp_%s-metric_%s-k_%s-sign_%s-amb_%s.h5Seurat", bioproject, connectivity_model, metric, k, signature, cb_fpr)
)

regsrt_path <- here(
  data_dir,
  sprintf("resolved_subregions_by_microclusters/%s_regulons-DER-astrocytes_dataset-msp_%s-metric_%s-k_%s-sign_%s-amb_%s.h5Seurat", bioproject, connectivity_model, metric, k, signature, cb_fpr)
)

expr_mtx <- t(as.matrix(adata$raw$X))
colnames(expr_mtx) <- rownames(adata$X)
rownames(expr_mtx) <- adata$var_names
srt <- CreateSeuratObject(
  expr_mtx,
  assay = "RNA",
  project = "romanov2020_Hypoth_dev",
  meta.data = as.data.frame(adata$obs),
  row.names = colnames(adata$X)
)

regmtx <- t(adata$obsm$X_aucell)
colnames(regmtx) <- colnames(srt)
rownames(regmtx) <- adata$obs_keys() %>% .[str_detect(string = ., pattern = "^Regulon")]

regsrt <- CreateSeuratObject(
  regmtx,
  assay = "Regulons",
  project = "romanov2020_Hypoth_dev",
  meta.data = as.data.frame(adata$obs)
)

densmap_logit <- adata$obsm$X_umap_logit
colnames(densmap_logit) <- c("densMAP_logit_1", "densMAP_logit_2")
srt[["densmap_logit"]] <- CreateDimReducObject(embeddings = densmap_logit, key = "densMAP_logit_", assay = DefaultAssay(srt))
# srt <- ProjectDim(srt, reduction = "densmap_logit")
regsrt[["densmap_logit"]] <- CreateDimReducObject(embeddings = densmap_logit, key = "densMAP_logit_", assay = DefaultAssay(regsrt))
# regsrt <- ProjectDim(regsrt, reduction = "densmap_logit")

densmap_xgboost <- adata$obsm$X_umap
colnames(densmap_xgboost) <- c("densMAP_xgboost_1", "densMAP_xgboost_2")
srt[["densmap_xgboost"]] <- CreateDimReducObject(embeddings = densmap_xgboost, key = "densMAP_xgboost_", assay = DefaultAssay(srt))
# srt <- ProjectDim(srt, reduction = "densmap_xgboost")
regsrt[["densmap_xgboost"]] <- CreateDimReducObject(embeddings = densmap_xgboost, key = "densMAP_xgboost_", assay = DefaultAssay(regsrt))
# regsrt <- ProjectDim(regsrt, reduction = "densmap_xgboost")

Idents(srt) <- "predict_xgboost_subregion"
Idents(regsrt) <- "predict_xgboost_subregion"
srt <- Store_Palette_Seurat(seurat_object = srt, palette = rev(brewer.pal(n = 11, name = "Spectral")), palette_name = "expr_Colour_Pal")
regsrt <- Store_Palette_Seurat(seurat_object = regsrt, palette = rev(brewer.pal(n = 11, name = "PiYG")), palette_name = "expr_Colour_Pal")
SaveH5Seurat(srt, filename = srt_path, overwrite = TRUE)
SaveH5Seurat(regsrt, filename = regsrt_path, overwrite = TRUE)

Code

srt <- LoadH5Seurat(file = srt_path)
regsrt <- LoadH5Seurat(file = regsrt_path)
Idents(srt) <- "predict_xgboost_subregion"
Idents(regsrt) <- "predict_xgboost_subregion"
print(srt)

An object of class Seurat 
21989 features across 3042 samples within 1 assay 
Active assay: RNA (21989 features, 0 variable features)
 2 dimensional reductions calculated: densmap_logit, densmap_xgboost

Code

print(regsrt)

An object of class Seurat 
52 features across 3042 samples within 1 assay 
Active assay: Regulons (52 features, 0 variable features)
 2 dimensional reductions calculated: densmap_logit, densmap_xgboost

Code

srt <- NormalizeData(srt)
srt <- FindVariableFeatures(srt, selection.method = "vst", nfeatures = 3000)
all.genes <- rownames(srt)
srt <- ScaleData(srt, features = all.genes)

all_markers_genes <- FindAllMarkers(object = srt, verbose = F, test.use = "MAST", only.pos = T, min.pct = 0.05, logfc.threshold = 0.15) %>%
  Add_Pct_Diff()
write_csv(all_markers_genes, here(data_dir, sprintf("resolved_subregions_by_microclusters/%s-all-marker-genes.csv", bioproject)))

all_markers_genes <- all_markers_genes %>%
  filter(pct_diff > 0.1) %>%
  group_by(cluster) %>%
  arrange(desc(avg_log2FC)) %>%
  arrange(cluster)
all_markers_genes2 <- all_markers_genes %>%
  filter(gene %in% c(npr, nmr, transcription_factors)) %>%
  group_by(cluster) %>%
  arrange(desc(avg_log2FC)) %>%
  arrange(cluster)

top_5 <- Extract_Top_Markers(marker_dataframe = all_markers_genes, num_genes = 5, group_by = "cluster", gene_column = "gene", rank_by = "avg_log2FC", make_unique = TRUE, named_vector = FALSE)
top_30 <- Extract_Top_Markers(marker_dataframe = all_markers_genes, num_genes = 30, group_by = "cluster", gene_column = "gene", rank_by = "avg_log2FC")
top_5s <- Extract_Top_Markers(marker_dataframe = all_markers_genes2, num_genes = 5, group_by = "cluster", gene_column = "gene", rank_by = "avg_log2FC", make_unique = TRUE, named_vector = FALSE)
top_30s <- Extract_Top_Markers(marker_dataframe = all_markers_genes2, num_genes = 30, group_by = "cluster", gene_column = "gene", rank_by = "avg_log2FC")

Code

Iterate_FeaturePlot_scCustom(reduction = "densmap_xgboost",
  na_cutoff = NA,
  pt.size = 4,
  order = TRUE,
  alpha_na_exp = 0.1,
  alpha_exp = 0.75, seurat_object = srt, gene_list = top_30, single_pdf = FALSE, file_path = plots_dir, file_type=".pdf", file_name = sprintf("%s.pdf", bioproject), colors_use = srt@misc$expr_Colour_Pal
)

Code

Clustered_DotPlot(seurat_object = srt, colors_use_exp = viridis(n = 30, alpha = .75, direction = -1, option = "E"), features = top_5, k = 9)

[[1]]


[[2]]

Code

Clustered_DotPlot(seurat_object = srt, colors_use_exp = viridis(n = 30, alpha = .75, direction = -1, option = "E"), features = top_5s, k = 9)

[[1]]


[[2]]

Code

DotPlot_scCustom(seurat_object = srt, colors_use = viridis(n = 30, alpha = .75, direction = -1, option = "E"), features = npr[npr %in% rownames(srt)], flip_axes = T, x_lab_rotate = TRUE)

Code

DotPlot_scCustom(seurat_object = srt, colors_use = viridis(n = 30, alpha = .75, direction = -1, option = "E"), features = nmr[nmr %in% rownames(srt)], flip_axes = T, x_lab_rotate = TRUE)

Code

DotPlot_scCustom(seurat_object = srt, colors_use = viridis(n = 30, alpha = .75, direction = -1, option = "E"), features = genes.embed[genes.embed %in% rownames(srt)], flip_axes = T, x_lab_rotate = TRUE)

Code

DotPlot_scCustom(seurat_object = srt, colors_use = viridis(n = 30, alpha = .75, direction = -1, option = "E"), features = mitochondrial[mitochondrial %in% rownames(srt)], flip_axes = T, x_lab_rotate = TRUE)

Code

de_01 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "0", ident.2 = "1",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_02 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "0", ident.2 = "2",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_03 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "0", ident.2 = "3",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_04 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "0", ident.2 = "4",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_05 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "0", ident.2 = "5",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_06 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "0", ident.2 = "6",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_07 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "0", ident.2 = "7",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_12 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "1", ident.2 = "2",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_13 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "1", ident.2 = "3",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_14 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "1", ident.2 = "4",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_15 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "1", ident.2 = "5",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_16 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "1", ident.2 = "6",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_17 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "1", ident.2 = "7",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_23 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "2", ident.2 = "3",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_24 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "2", ident.2 = "4",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_25 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "2", ident.2 = "5",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_26 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "2", ident.2 = "6",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_27 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "2", ident.2 = "7",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_34 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "3", ident.2 = "4",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_35 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "3", ident.2 = "5",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_36 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "3", ident.2 = "6",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_37 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "3", ident.2 = "7",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_45 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "4", ident.2 = "5",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_46 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "4", ident.2 = "6",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_47 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "4", ident.2 = "7",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_56 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "5", ident.2 = "6",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_57 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "5", ident.2 = "7",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")

de_67 <- FindMarkers(srt,
  assay = "RNA", ident.1 = "6", ident.2 = "7",
  test.use = "LR",
  only.pos = F,
  min.pct = 0.01
) %>% rownames_to_column("gene")


top_genes <- list()

combinations <- list(
  contrast01 = de_01, contrast02 = de_02, contrast03 = de_03,
  contrast04 = de_04, contrast05 = de_05, contrast06 = de_06,
  contrast07 = de_07, contrast12 = de_12, contrast13 = de_13,
  contrast14 = de_14, contrast15 = de_15, contrast16 = de_16,
  contrast17 = de_17, contrast23 = de_23, contrast24 = de_24,
  contrast25 = de_25, contrast26 = de_26, contrast27 = de_27,
  contrast34 = de_34, contrast35 = de_35, contrast36 = de_36,
  contrast37 = de_37, contrast45 = de_45, contrast46 = de_46,
  contrast47 = de_47, contrast56 = de_56, contrast57 = de_57,
  contrast67 = de_67
) %>% compact()

# Iterate through the 'combinations' list
for (contrast_name in names(combinations)) {
  # Create a unique file name for each data frame
  file_name <- paste0(contrast_name, "_result.csv")

  # Save the data frame as a CSV file
  write_csv(combinations[[contrast_name]], here(tables_dir, file_name))

  # Print a message to indicate progress
  print(paste0("Saved: ", file_name))
}

[1] "Saved: contrast01_result.csv"
[1] "Saved: contrast02_result.csv"
[1] "Saved: contrast03_result.csv"
[1] "Saved: contrast04_result.csv"
[1] "Saved: contrast05_result.csv"
[1] "Saved: contrast06_result.csv"
[1] "Saved: contrast07_result.csv"
[1] "Saved: contrast12_result.csv"
[1] "Saved: contrast13_result.csv"
[1] "Saved: contrast14_result.csv"
[1] "Saved: contrast15_result.csv"
[1] "Saved: contrast16_result.csv"
[1] "Saved: contrast17_result.csv"
[1] "Saved: contrast23_result.csv"
[1] "Saved: contrast24_result.csv"
[1] "Saved: contrast25_result.csv"
[1] "Saved: contrast26_result.csv"
[1] "Saved: contrast27_result.csv"
[1] "Saved: contrast34_result.csv"
[1] "Saved: contrast35_result.csv"
[1] "Saved: contrast36_result.csv"
[1] "Saved: contrast37_result.csv"
[1] "Saved: contrast45_result.csv"
[1] "Saved: contrast46_result.csv"
[1] "Saved: contrast47_result.csv"
[1] "Saved: contrast56_result.csv"
[1] "Saved: contrast57_result.csv"
[1] "Saved: contrast67_result.csv"

Code

for (contrast in names(combinations)) {
  top_upregulated <- combinations[[contrast]] %>%
    filter(p_val_adj < 0.05, avg_log2FC > 0, gene %in% gene_int) %>%
    arrange(desc(avg_log2FC)) %>%
    slice_head(n = 5)
  top_downregulated <- combinations[[contrast]] %>%
    filter(p_val_adj < 0.05, avg_log2FC < 0, gene %in% gene_int) %>%
    arrange(avg_log2FC) %>%
    slice_head(n = 5)

  top_genes[[contrast]] <- c(top_upregulated$gene, top_downregulated$gene)
}

Code

DefaultAssay(srt) <- "RNA"
cluster.averages <- AverageExpression(srt, return.seurat = TRUE)
DefaultAssay(cluster.averages) <- "RNA"

cluster.averages

An object of class Seurat 
21989 features across 8 samples within 1 assay 
Active assay: RNA (21989 features, 0 variable features)

Code

plot_pairs <- function(highlight_genes, pair_name) {
  i <- pair_name %>% str_sub(9, 9)
  j <- pair_name %>% str_sub(10, 10)
  print(paste0("Comparison: ", i, " vs ", j))
  plot <- CellScatter(cluster.averages, cell1 = i, cell2 = j, features = gene_int[gene_int %in% rownames(cluster.averages)], highlight = highlight_genes)
  return(plot)
}
plots <- top_genes %>% imap(plot_pairs)

[1] "Comparison: 0 vs 1"
[1] "Comparison: 0 vs 2"
[1] "Comparison: 0 vs 3"
[1] "Comparison: 0 vs 4"
[1] "Comparison: 0 vs 5"
[1] "Comparison: 0 vs 6"
[1] "Comparison: 0 vs 7"
[1] "Comparison: 1 vs 2"
[1] "Comparison: 1 vs 3"
[1] "Comparison: 1 vs 4"
[1] "Comparison: 1 vs 5"
[1] "Comparison: 1 vs 6"
[1] "Comparison: 1 vs 7"
[1] "Comparison: 2 vs 3"
[1] "Comparison: 2 vs 4"
[1] "Comparison: 2 vs 5"
[1] "Comparison: 2 vs 6"
[1] "Comparison: 2 vs 7"
[1] "Comparison: 3 vs 4"
[1] "Comparison: 3 vs 5"
[1] "Comparison: 3 vs 6"
[1] "Comparison: 3 vs 7"
[1] "Comparison: 4 vs 5"
[1] "Comparison: 4 vs 6"
[1] "Comparison: 4 vs 7"
[1] "Comparison: 5 vs 6"
[1] "Comparison: 5 vs 7"
[1] "Comparison: 6 vs 7"

Code

plt_grid <- cowplot::plot_grid(plotlist = plots, ncol = 7, labels = names(top_genes) %>% map_chr(str_sub, 9, 10), label_size = 8)
plt_grid

Code

save_plot(here(plots_dir, "de-pairs-plot.pdf"), plt_grid, base_width = 6, base_height = 7, base_asp = 1, ncol = 7, limitsize = FALSE)

Code

# regsrt <- NormalizeData(regsrt)
# regsrt <- FindVariableFeatures(regsrt, selection.method = "vst")
# all.genes <- rownames(regsrt)
# regsrt <- ScaleData(regsrt, features = all.genes)
all_regulon_markers_regulons <- FindAllMarkers(object = regsrt, verbose = F, test.use = "LR", assay = "Regulons", only.pos = T, min.pct = 0.05, logfc.threshold = 0.1)
write_csv(all_regulon_markers_regulons, here(data_dir, sprintf("resolved_subregions_by_microclusters/%s-all-marker-regulons.csv", bioproject)))

if (dim(all_regulon_markers_regulons)[1] > 0) {
  all_regulon_markers_regulons <- all_regulon_markers_regulons %>%
    group_by(cluster) %>%
    arrange(desc(avg_log2FC)) %>%
    arrange(cluster)

  top_5r <- Extract_Top_Markers(marker_dataframe = all_regulon_markers_regulons, num_genes = 5, group_by = "cluster", gene_column = "gene", rank_by = "avg_log2FC", make_unique = TRUE, named_vector = FALSE)
  top_15r <- Extract_Top_Markers(marker_dataframe = all_regulon_markers_regulons, num_genes = 15, group_by = "cluster", gene_column = "gene", rank_by = "avg_log2FC")
}

Code

Iterate_FeaturePlot_scCustom(reduction = "densmap_xgboost",
    na_cutoff = NA,
    pt.size = 4,
    order = TRUE,
    alpha_na_exp = 0.1,
    alpha_exp = 0.75, seurat_object = regsrt, gene_list = rownames(regmtx), single_pdf = FALSE, file_path = plots_dir, file_type=".pdf", file_name = sprintf("%s-regulon.pdf", bioproject), colors_use = regsrt@misc$expr_Colour_Pal
)

Code

Clustered_DotPlot(seurat_object = regsrt, colors_use_exp = viridis(n = 30, alpha = .75, direction = -1, option = "G"), features = rownames(regmtx), k = 14)

[[1]]


[[2]]

Session information

Code

sI <- sessioninfo::session_info()
sI$loadedOnly <- NULL
print(sI, locale = FALSE)

─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.2.2 (2022-10-31)
 os       Ubuntu 22.04.2 LTS
 system   x86_64, linux-gnu
 ui       X11
 language en_US:en
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       Etc/UTC
 date     2023-07-07
 pandoc   2.19.2 @ /opt/python/3.8.8/bin/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 package              * version     date (UTC) lib source
 abind                  1.4-5       2016-07-21 [1] RSPM (R 4.2.0)
 anndata              * 0.7.5.6     2023-03-17 [1] RSPM (R 4.2.0)
 assertthat             0.2.1       2019-03-21 [1] RSPM (R 4.2.0)
 base64enc              0.1-3       2015-07-28 [1] RSPM (R 4.2.0)
 bayestestR             0.13.1      2023-04-30 [1] https://easystats.r-universe.dev (R 4.2.2)
 beeswarm               0.4.0       2021-06-01 [1] RSPM (R 4.2.0)
 Biobase                2.58.0      2022-11-01 [1] RSPM (R 4.2.2)
 BiocGenerics           0.44.0      2022-11-01 [1] RSPM (R 4.2.2)
 BiocManager            1.30.20     2023-02-24 [1] RSPM (R 4.2.0)
 bit                    4.0.5       2022-11-15 [1] RSPM (R 4.2.0)
 bit64                  4.0.5       2020-08-30 [1] RSPM (R 4.2.0)
 bitops                 1.0-7       2021-04-24 [1] RSPM (R 4.2.0)
 Cairo                  1.6-0       2022-07-05 [1] RSPM (R 4.2.2)
 circlize               0.4.15      2022-05-10 [1] RSPM (R 4.2.0)
 cli                    3.6.1       2023-03-23 [1] RSPM (R 4.2.0)
 clue                   0.3-64      2023-01-31 [1] RSPM (R 4.2.0)
 cluster                2.1.4       2022-08-22 [1] RSPM (R 4.2.0)
 coda                   0.19-4      2020-09-30 [1] RSPM (R 4.2.0)
 codetools              0.2-19      2023-02-01 [1] RSPM (R 4.2.0)
 colorspace             2.1-0       2023-01-23 [1] RSPM (R 4.2.0)
 ComplexHeatmap         2.14.0      2022-11-01 [1] RSPM (R 4.2.2)
 correlation            0.8.4       2023-04-30 [1] Github (easystats/correlation@3d5cd1e)
 cowplot              * 1.1.1       2020-12-30 [1] RSPM (R 4.2.0)
 crayon                 1.5.2       2022-09-29 [1] RSPM (R 4.2.0)
 data.table             1.14.8      2023-02-17 [1] RSPM (R 4.2.0)
 datawizard             0.7.1       2023-04-03 [1] RSPM (R 4.2.0)
 DelayedArray           0.24.0      2022-11-01 [1] RSPM (R 4.2.2)
 deldir                 1.0-6       2021-10-23 [1] RSPM (R 4.2.0)
 digest                 0.6.31      2022-12-11 [1] RSPM (R 4.2.0)
 doParallel             1.0.17      2022-02-07 [1] RSPM (R 4.2.0)
 dplyr                * 1.1.2       2023-04-20 [1] RSPM (R 4.2.2)
 ellipsis               0.3.2       2021-04-29 [1] RSPM (R 4.2.0)
 emmeans                1.8.5       2023-03-08 [1] RSPM (R 4.2.0)
 estimability           1.4.1       2022-08-05 [1] RSPM (R 4.2.0)
 evaluate               0.20        2023-01-17 [1] RSPM (R 4.2.0)
 fansi                  1.0.4       2023-01-22 [1] RSPM (R 4.2.0)
 farver                 2.1.1       2022-07-06 [1] RSPM (R 4.2.0)
 fastmap                1.1.1       2023-02-24 [1] RSPM (R 4.2.0)
 fitdistrplus           1.1-11      2023-04-25 [1] RSPM (R 4.2.2)
 forcats              * 1.0.0       2023-01-29 [1] RSPM (R 4.2.0)
 foreach                1.5.2       2022-02-02 [1] RSPM (R 4.2.0)
 future               * 1.32.0      2023-03-07 [1] RSPM (R 4.2.0)
 future.apply           1.10.0      2022-11-05 [1] RSPM (R 4.2.0)
 generics               0.1.3       2022-07-05 [1] RSPM (R 4.2.0)
 GenomeInfoDb           1.34.9      2023-02-02 [1] RSPM (R 4.2.2)
 GenomeInfoDbData       1.2.9       2023-04-30 [1] RSPM (R 4.2.2)
 GenomicRanges          1.50.2      2022-12-16 [1] RSPM (R 4.2.2)
 GetoptLong             1.0.5       2020-12-15 [1] RSPM (R 4.2.0)
 ggbeeswarm             0.7.2       2023-04-30 [1] Github (eclarke/ggbeeswarm@3cf58a9)
 ggmin                  0.0.0.9000  2023-04-30 [1] Github (sjessa/ggmin@8ada274)
 ggplot2              * 3.4.2       2023-04-03 [1] RSPM (R 4.2.0)
 ggprism                1.0.4       2023-04-30 [1] Github (csdaw/ggprism@0e411f4)
 ggrastr                1.0.1       2023-04-30 [1] Github (VPetukhov/ggrastr@7aed9af)
 ggrepel                0.9.2.9999  2023-04-30 [1] Github (slowkow/ggrepel@fe3b5c3)
 ggridges               0.5.4       2022-09-26 [1] RSPM (R 4.2.0)
 ggstatsplot          * 0.11.1.9000 2023-04-30 [1] Github (IndrajeetPatil/ggstatsplot@befe812)
 GlobalOptions          0.1.2       2020-06-10 [1] RSPM (R 4.2.0)
 globals                0.16.2      2022-11-21 [1] RSPM (R 4.2.0)
 glue                   1.6.2       2022-02-24 [1] RSPM (R 4.2.0)
 goftest                1.2-3       2021-10-07 [1] RSPM (R 4.2.0)
 gridExtra              2.3         2017-09-09 [1] RSPM (R 4.2.0)
 gtable                 0.3.3       2023-03-21 [1] RSPM (R 4.2.0)
 hdf5r                  1.3.8       2023-01-21 [1] RSPM (R 4.2.2)
 here                 * 1.0.1       2020-12-13 [1] RSPM (R 4.2.0)
 hms                    1.1.3       2023-03-21 [1] RSPM (R 4.2.0)
 htmltools              0.5.5       2023-03-23 [1] RSPM (R 4.2.0)
 htmlwidgets            1.6.2       2023-03-17 [1] RSPM (R 4.2.0)
 httpuv                 1.6.9       2023-02-14 [1] RSPM (R 4.2.0)
 httr                   1.4.5       2023-02-24 [1] RSPM (R 4.2.0)
 ica                    1.0-3       2022-07-08 [1] RSPM (R 4.2.0)
 igraph                 1.4.1       2023-02-24 [1] RSPM (R 4.2.0)
 insight                0.19.1      2023-03-18 [1] RSPM (R 4.2.0)
 IRanges                2.32.0      2022-11-01 [1] RSPM (R 4.2.2)
 irlba                  2.3.5.1     2022-10-03 [1] RSPM (R 4.2.0)
 iterators              1.0.14      2022-02-05 [1] RSPM (R 4.2.0)
 janitor                2.2.0.9000  2023-04-30 [1] Github (sfirke/janitor@d64c8bb)
 jsonlite               1.8.4       2022-12-06 [1] RSPM (R 4.2.0)
 KernSmooth             2.23-20     2021-05-03 [1] RSPM (R 4.2.0)
 knitr                  1.42        2023-01-25 [1] RSPM (R 4.2.0)
 labeling               0.4.2       2020-10-20 [1] RSPM (R 4.2.0)
 later                  1.3.0       2021-08-18 [1] RSPM (R 4.2.0)
 lattice                0.21-8      2023-04-05 [1] RSPM (R 4.2.0)
 lazyeval               0.2.2       2019-03-15 [1] RSPM (R 4.2.0)
 leiden                 0.4.3       2022-09-10 [1] RSPM (R 4.2.0)
 lifecycle              1.0.3       2022-10-07 [1] RSPM (R 4.2.0)
 listenv                0.9.0       2022-12-16 [1] RSPM (R 4.2.0)
 lmtest                 0.9-40      2022-03-21 [1] RSPM (R 4.2.0)
 lubridate            * 1.9.2       2023-02-10 [1] RSPM (R 4.2.0)
 magick                 2.7.4       2023-03-09 [1] RSPM (R 4.2.0)
 magrittr             * 2.0.3       2022-03-30 [1] RSPM (R 4.2.0)
 MASS                   7.3-58.1    2022-08-03 [1] CRAN (R 4.2.2)
 MAST                   1.24.1      2023-01-23 [1] RSPM (R 4.2.2)
 Matrix                 1.5-4       2023-04-04 [1] RSPM (R 4.2.0)
 MatrixGenerics         1.10.0      2022-11-01 [1] RSPM (R 4.2.2)
 matrixStats            0.63.0      2022-11-18 [1] RSPM (R 4.2.0)
 mime                   0.12        2021-09-28 [1] RSPM (R 4.2.0)
 miniUI                 0.1.1.1     2018-05-18 [1] RSPM (R 4.2.0)
 multcomp               1.4-23      2023-03-09 [1] RSPM (R 4.2.0)
 munsell                0.5.0       2018-06-12 [1] RSPM (R 4.2.0)
 mvtnorm                1.1-3       2021-10-08 [1] RSPM (R 4.2.0)
 nlme                   3.1-162     2023-01-31 [1] RSPM (R 4.2.0)
 paletteer              1.5.0       2022-10-19 [1] RSPM (R 4.2.0)
 parallelly             1.35.0      2023-03-23 [1] RSPM (R 4.2.0)
 parameters             0.20.3      2023-04-05 [1] RSPM (R 4.2.0)
 patchwork            * 1.1.2.9000  2023-04-30 [1] Github (thomasp85/patchwork@c14c960)
 pbapply                1.7-0       2023-01-13 [1] RSPM (R 4.2.0)
 pillar                 1.9.0       2023-03-22 [1] RSPM (R 4.2.0)
 pkgconfig              2.0.3       2019-09-22 [1] RSPM (R 4.2.0)
 plotly                 4.10.1      2022-11-07 [1] RSPM (R 4.2.0)
 plyr                   1.8.8       2022-11-11 [1] RSPM (R 4.2.0)
 png                    0.1-8       2022-11-29 [1] RSPM (R 4.2.0)
 polyclip               1.10-4      2022-10-20 [1] RSPM (R 4.2.0)
 prettyunits            1.1.1       2020-01-24 [1] RSPM (R 4.2.0)
 prismatic              1.1.1       2022-08-15 [1] RSPM (R 4.2.0)
 progress               1.2.2       2019-05-16 [1] RSPM (R 4.2.0)
 progressr              0.13.0      2023-01-10 [1] RSPM (R 4.2.0)
 promises               1.2.0.1     2021-02-11 [1] RSPM (R 4.2.0)
 purrr                * 1.0.1       2023-01-10 [1] RSPM (R 4.2.0)
 R.methodsS3            1.8.2       2022-06-13 [1] RSPM (R 4.2.0)
 R.oo                   1.25.0      2022-06-12 [1] RSPM (R 4.2.0)
 R.utils                2.12.2      2022-11-11 [1] RSPM (R 4.2.0)
 R6                     2.5.1       2021-08-19 [1] RSPM (R 4.2.0)
 ragg                   1.2.5       2023-01-12 [1] RSPM (R 4.2.0)
 RANN                   2.6.1       2019-01-08 [1] RSPM (R 4.2.0)
 RColorBrewer         * 1.1-3       2022-04-03 [1] RSPM (R 4.2.0)
 Rcpp                   1.0.10      2023-01-22 [1] RSPM (R 4.2.0)
 RcppAnnoy              0.0.20      2022-10-27 [1] RSPM (R 4.2.0)
 RCurl                  1.98-1.12   2023-03-27 [1] RSPM (R 4.2.0)
 readr                * 2.1.4       2023-02-10 [1] RSPM (R 4.2.0)
 rematch2               2.1.2       2020-05-01 [1] RSPM (R 4.2.0)
 remotes                2.4.2       2021-11-30 [1] RSPM (R 4.2.0)
 repr                   1.1.6       2023-01-26 [1] RSPM (R 4.2.0)
 reshape2               1.4.4       2020-04-09 [1] RSPM (R 4.2.0)
 reticulate           * 1.28-9000   2023-04-30 [1] Github (rstudio/reticulate@442c49f)
 rjson                  0.2.21      2022-01-09 [1] RSPM (R 4.2.0)
 rlang                  1.1.0       2023-03-14 [1] RSPM (R 4.2.0)
 rmarkdown              2.21        2023-03-26 [1] RSPM (R 4.2.0)
 ROCR                   1.0-11      2020-05-02 [1] RSPM (R 4.2.0)
 rprojroot              2.0.3       2022-04-02 [1] RSPM (R 4.2.0)
 rsvd                   1.0.5       2021-04-16 [1] RSPM (R 4.2.0)
 Rtsne                  0.16        2022-04-17 [1] RSPM (R 4.2.0)
 S4Vectors              0.36.2      2023-02-26 [1] RSPM (R 4.2.2)
 sandwich               3.0-2       2022-06-15 [1] RSPM (R 4.2.0)
 scales                 1.2.1       2022-08-20 [1] RSPM (R 4.2.0)
 scattermore            0.8         2022-02-14 [1] RSPM (R 4.2.0)
 scCustomize          * 1.1.1       2023-04-30 [1] Github (samuel-marsh/scCustomize@d08268d)
 sceasy               * 0.0.7       2023-04-30 [1] Github (cellgeni/sceasy@0cfc0e3)
 sctransform            0.3.5       2022-09-21 [1] RSPM (R 4.2.0)
 sessioninfo            1.2.2       2021-12-06 [1] RSPM (R 4.2.0)
 Seurat               * 4.3.0       2022-11-18 [1] RSPM (R 4.2.2)
 SeuratDisk           * 0.0.0.9020  2023-04-30 [1] Github (mojaveazure/seurat-disk@9b89970)
 SeuratObject         * 4.1.3       2022-11-07 [1] RSPM (R 4.2.0)
 SeuratWrappers       * 0.3.1       2023-04-30 [1] Github (satijalab/seurat-wrappers@d28512f)
 shape                  1.4.6       2021-05-19 [1] RSPM (R 4.2.0)
 shiny                  1.7.4       2022-12-15 [1] RSPM (R 4.2.0)
 SingleCellExperiment   1.20.1      2023-03-17 [1] RSPM (R 4.2.2)
 skimr                * 2.1.5       2023-04-30 [1] Github (ropensci/skimr@d5126aa)
 snakecase              0.11.0      2019-05-25 [1] RSPM (R 4.2.0)
 sp                     1.6-0       2023-01-19 [1] RSPM (R 4.2.0)
 spatstat.data          3.0-1       2023-03-12 [1] RSPM (R 4.2.0)
 spatstat.explore       3.1-0       2023-03-14 [1] RSPM (R 4.2.0)
 spatstat.geom          3.1-0       2023-03-12 [1] RSPM (R 4.2.0)
 spatstat.random        3.1-4       2023-03-13 [1] RSPM (R 4.2.0)
 spatstat.sparse        3.0-1       2023-03-12 [1] RSPM (R 4.2.0)
 spatstat.utils         3.0-2       2023-03-11 [1] RSPM (R 4.2.0)
 statsExpressions       1.5.0       2023-02-19 [1] RSPM (R 4.2.0)
 stringi                1.7.12      2023-01-11 [1] RSPM (R 4.2.0)
 stringr              * 1.5.0       2022-12-02 [1] RSPM (R 4.2.0)
 SummarizedExperiment   1.28.0      2022-11-01 [1] RSPM (R 4.2.2)
 survival               3.5-5       2023-03-12 [1] RSPM (R 4.2.0)
 systemfonts            1.0.4       2022-02-11 [1] RSPM (R 4.2.0)
 tensor                 1.5         2012-05-05 [1] RSPM (R 4.2.0)
 textshaping            0.3.6       2021-10-13 [1] RSPM (R 4.2.0)
 TH.data                1.1-1       2022-04-26 [1] RSPM (R 4.2.0)
 tibble               * 3.2.1       2023-03-20 [1] RSPM (R 4.2.0)
 tidyr                * 1.3.0       2023-01-24 [1] RSPM (R 4.2.0)
 tidyselect             1.2.0       2022-10-10 [1] RSPM (R 4.2.0)
 tidyverse            * 2.0.0.9000  2023-04-30 [1] Github (tidyverse/tidyverse@8ec2e1f)
 timechange             0.2.0       2023-01-11 [1] RSPM (R 4.2.0)
 tzdb                   0.3.0       2022-03-28 [1] RSPM (R 4.2.0)
 utf8                   1.2.3       2023-01-31 [1] RSPM (R 4.2.0)
 uwot                   0.1.14      2022-08-22 [1] RSPM (R 4.2.0)
 vctrs                  0.6.2       2023-04-19 [1] RSPM (R 4.2.2)
 vipor                  0.4.5       2017-03-22 [1] RSPM (R 4.2.0)
 viridis              * 0.6.2       2021-10-13 [1] RSPM (R 4.2.0)
 viridisLite          * 0.4.1       2022-08-22 [1] RSPM (R 4.2.0)
 vroom                  1.6.1       2023-01-22 [1] RSPM (R 4.2.0)
 withr                  2.5.0       2022-03-03 [1] RSPM (R 4.2.0)
 xfun                   0.39        2023-04-20 [1] RSPM (R 4.2.2)
 xtable                 1.8-4       2019-04-21 [1] RSPM (R 4.2.0)
 XVector                0.38.0      2022-11-01 [1] RSPM (R 4.2.2)
 yaml                   2.3.7       2023-01-23 [1] RSPM (R 4.2.0)
 zeallot                0.1.0       2018-01-28 [1] RSPM (R 4.2.0)
 zlibbioc               1.44.0      2022-11-01 [1] RSPM (R 4.2.2)
 zoo                    1.8-12      2023-04-13 [1] RSPM (R 4.2.2)

 [1] /opt/R/4.2.2/lib/R/library

─ Python configuration ───────────────────────────────────────────────────────
 python:         /opt/python/3.8.8/bin/python
 libpython:      /opt/python/3.8.8/lib/libpython3.8.so
 pythonhome:     /opt/python/3.8.8:/opt/python/3.8.8
 version:        3.8.8 | packaged by conda-forge | (default, Feb 20 2021, 16:22:27)  [GCC 9.3.0]
 numpy:          /opt/python/3.8.8/lib/python3.8/site-packages/numpy
 numpy_version:  1.23.5
 scanpy:         /opt/python/3.8.8/lib/python3.8/site-packages/scanpy
 
 NOTE: Python version was forced by RETICULATE_PYTHON

──────────────────────────────────────────────────────────────────────────────