Detailed CellChat signalling analysis of Insr positive vs. Insr negative astrocytes

Author

Evgenii O. Tretiakov

Published

July 31, 2023

Load data and setup parameters

Code
# Load tidyverse infrastructure packages
suppressPackageStartupMessages({
  library(reticulate)
  library(future)
  library(here)
  library(tidyverse)
  library(magrittr)
  library(stringr)
  library(skimr)
  library(RColorBrewer)
})


# Load packages for scRNA-seq analysis and visualisation
suppressPackageStartupMessages({
  library(ggplot2)
  library(ggrepel)
  library(cowplot)
  library(patchwork)
  library(ggstatsplot)
  library(ggupset)
  library(ComplexUpset)
  library(dabestr)
})

Set paths

Code
src_dir <- here("code")
data_dir <- here("data")
output_dir <- here("output")
plots_dir <- here(output_dir, "figures")
tables_dir <- here(output_dir, "tables")

Load helper functions and gene-sets

Code
source(here(src_dir, "genes.R"))
source(here(src_dir, "functions.R"))

Set fixed variables

Code
# set seed
reseed <- 42
set.seed(seed = reseed)

# Parameters for parallel execution
n_cores <- 8
plan("multisession", workers = n_cores)
options(
  future.globals.maxSize = 100000 * 1024^2,
  future.rng.onMisuse = "ignore"
)
plan()
multisession:
- args: function (..., workers = 8, envir = parent.frame())
- tweaked: TRUE
- call: plan("multisession", workers = n_cores)
Code
# ggplot2 theme
theme_set(ggmin::theme_powerpoint())

Load selected astrocytes signalling data estimated using Liana-py and Rank Aggregation method (including CellPhoneDB, CellChat, ICELLNET, connectomeDB2020, and CellTalkDB) for CellChatDB only

Code
lutomska_norm <-
  read_csv(here(
    data_dir,
    "signalling", "lutomska",
    "arc_astro-with_npy_pomc-liana-res-cellchatdb.csv"
  ))

lutomska_hfhsd <-
  read_csv(here(
    data_dir,
    "signalling", "lutomska",
    "hfd-arc_astro-with_npy_pomc-liana-res-cellchatdb.csv"
  ))

deng_norm <-
  read_csv(here(
    data_dir,
    "signalling", "deng",
    "arc_astro-with_npy_pomc-liana-res-cellchatdb.csv"
  ))

deng_hfd <-
  read_csv(here(
    data_dir,
    "signalling", "deng",
    "hfd-arc_astro-with_npy_pomc-liana-res-cellchatdb.csv"
  ))

# lopez_norm <-
#   read_csv(here(
#     data_dir,
#     "signalling", "lopez",
#     "pvn_astro-with_npy_pomc-liana-res.csv"
#   ))
# 
# lopez_csds <-
#   read_csv(here(
#     data_dir,
#     "signalling", "lopez",
#     "chronic-stress-pvn_astro-with_npy_pomc-liana-res.csv"
#   ))
Code
skim(lutomska_norm)
Data summary
Name lutomska_norm
Number of rows 18768
Number of columns 17
_______________________
Column type frequency:
character 4
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
source 0 1 11 16 0 4 0
target 0 1 11 16 0 4 0
ligand_complex 0 1 2 16 0 267 0
receptor_complex 0 1 3 19 0 284 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
…1 0 1 9383.50 5418.00 0.00 4691.75 9383.50 14075.25 18767.00 ▇▇▇▇▇
lr_means 0 1 0.11 0.28 0.02 0.02 0.02 0.02 3.69 ▇▁▁▁▁
cellphone_pvals 0 1 0.86 0.33 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
expr_prod 0 1 0.06 0.49 0.00 0.00 0.00 0.00 13.31 ▇▁▁▁▁
scaled_weight 0 1 -0.94 0.67 -1.27 -1.27 -1.27 -1.27 4.34 ▇▂▁▁▁
lr_logfc 0 1 -1.74 0.92 -2.21 -2.21 -2.21 -2.21 3.41 ▇▁▁▁▁
spec_weight 0 1 0.02 0.07 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
lrscore 0 1 0.22 0.21 0.12 0.12 0.12 0.12 0.96 ▇▁▁▁▁
lr_probs 0 1 0.00 0.01 0.00 0.00 0.00 0.00 0.32 ▇▁▁▁▁
cellchat_pvals 0 1 0.95 0.21 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
steady_rank 0 1 0.80 0.39 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
specificity_rank 0 1 0.81 0.38 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
magnitude_rank 0 1 0.91 0.26 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
Code
skim(lutomska_hfhsd)
Data summary
Name lutomska_hfhsd
Number of rows 19936
Number of columns 17
_______________________
Column type frequency:
character 4
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
source 0 1 11 16 0 4 0
target 0 1 11 16 0 4 0
ligand_complex 0 1 2 16 0 304 0
receptor_complex 0 1 3 19 0 320 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
…1 0 1 9967.50 5755.17 0.00 4983.75 9967.50 14951.25 19935.00 ▇▇▇▇▇
lr_means 0 1 0.10 0.26 0.02 0.02 0.02 0.02 3.71 ▇▁▁▁▁
cellphone_pvals 0 1 0.88 0.32 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
expr_prod 0 1 0.05 0.44 0.00 0.00 0.00 0.00 13.41 ▇▁▁▁▁
scaled_weight 0 1 -1.14 0.70 -1.45 -1.45 -1.45 -1.45 4.60 ▇▂▁▁▁
lr_logfc 0 1 -1.88 0.91 -2.29 -2.29 -2.29 -2.29 4.02 ▇▂▁▁▁
spec_weight 0 1 0.02 0.07 0.00 0.00 0.00 0.00 0.97 ▇▁▁▁▁
lrscore 0 1 0.23 0.19 0.14 0.14 0.14 0.14 0.96 ▇▁▁▁▁
lr_probs 0 1 0.00 0.01 0.00 0.00 0.00 0.00 0.33 ▇▁▁▁▁
cellchat_pvals 0 1 0.96 0.20 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
steady_rank 0 1 0.83 0.37 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
specificity_rank 0 1 0.84 0.36 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
magnitude_rank 0 1 0.91 0.26 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
Code
skim(deng_norm)
Data summary
Name deng_norm
Number of rows 19696
Number of columns 17
_______________________
Column type frequency:
character 4
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
source 0 1 11 16 0 4 0
target 0 1 11 16 0 4 0
ligand_complex 0 1 2 16 0 290 0
receptor_complex 0 1 3 19 0 314 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
…1 0 1 9847.50 5685.89 0.00 4923.75 9847.50 14771.25 19695.00 ▇▇▇▇▇
lr_means 0 1 0.12 0.29 0.01 0.01 0.01 0.09 3.71 ▇▁▁▁▁
cellphone_pvals 0 1 0.83 0.35 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
expr_prod 0 1 0.06 0.50 0.00 0.00 0.00 0.00 13.45 ▇▁▁▁▁
scaled_weight 0 1 -0.89 0.68 -1.30 -1.30 -1.30 -0.21 2.35 ▇▂▁▁▁
lr_logfc 0 1 -2.18 1.38 -3.05 -3.05 -3.05 -0.44 4.22 ▇▁▂▁▁
spec_weight 0 1 0.03 0.06 0.00 0.00 0.00 0.02 0.91 ▇▁▁▁▁
lrscore 0 1 0.20 0.23 0.07 0.07 0.07 0.29 0.95 ▇▁▁▁▁
lr_probs 0 1 0.00 0.01 0.00 0.00 0.00 0.00 0.32 ▇▁▁▁▁
cellchat_pvals 0 1 0.92 0.26 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
steady_rank 0 1 0.76 0.41 0.00 0.32 1.00 1.00 1.00 ▂▁▁▁▇
specificity_rank 0 1 0.77 0.39 0.00 0.49 1.00 1.00 1.00 ▂▁▁▁▇
magnitude_rank 0 1 0.89 0.29 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
Code
skim(deng_hfd)
Data summary
Name deng_hfd
Number of rows 20208
Number of columns 17
_______________________
Column type frequency:
character 4
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
source 0 1 11 16 0 4 0
target 0 1 11 16 0 4 0
ligand_complex 0 1 2 16 0 298 0
receptor_complex 0 1 3 19 0 318 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
…1 0 1 10103.50 5833.69 0.00 5051.75 10103.50 15155.25 20207.00 ▇▇▇▇▇
lr_means 0 1 0.12 0.29 0.02 0.02 0.02 0.09 3.75 ▇▁▁▁▁
cellphone_pvals 0 1 0.83 0.35 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
expr_prod 0 1 0.07 0.53 0.00 0.00 0.00 0.00 13.79 ▇▁▁▁▁
scaled_weight 0 1 -0.71 0.57 -1.05 -1.05 -1.05 -0.19 3.42 ▇▂▁▁▁
lr_logfc 0 1 -1.44 0.91 -2.01 -2.01 -2.01 -0.41 2.46 ▇▁▂▁▁
spec_weight 0 1 0.03 0.06 0.00 0.00 0.00 0.02 0.81 ▇▁▁▁▁
lrscore 0 1 0.21 0.22 0.09 0.09 0.09 0.28 0.96 ▇▁▁▁▁
lr_probs 0 1 0.00 0.02 0.00 0.00 0.00 0.00 0.39 ▇▁▁▁▁
cellchat_pvals 0 1 0.93 0.24 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
steady_rank 0 1 0.75 0.40 0.00 0.32 1.00 1.00 1.00 ▂▁▁▁▇
specificity_rank 0 1 0.77 0.39 0.00 0.50 1.00 1.00 1.00 ▂▁▁▁▇
magnitude_rank 0 1 0.89 0.28 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
Code
# skim(lopez_norm)
Code
# skim(lopez_csds)

Filter by p-values

Code
lutomska_norm <-
  lutomska_norm |>
  filter(
    specificity_rank <= 1,
    magnitude_rank <= 1
  )

lutomska_hfhsd <-
  lutomska_hfhsd |>
  filter(
    specificity_rank <= 1,
    magnitude_rank <= 1
  )

deng_norm <-
  deng_norm |>
  filter(
    specificity_rank <= 1,
    magnitude_rank <= 1
  )

deng_hfd <-
  deng_hfd |>
  filter(
    specificity_rank <= 1,
    magnitude_rank <= 1
  )

# lopez_norm <-
#   lopez_norm |>
#   filter(
#     specificity_rank <= 0.05,
#     magnitude_rank <= 0.05
#   )
# 
# lopez_csds <-
#   lopez_csds |>
#   filter(
#     specificity_rank <= 0.05,
#     magnitude_rank <= 0.05
#   )
Code
skim(lutomska_norm)
Data summary
Name lutomska_norm
Number of rows 18768
Number of columns 17
_______________________
Column type frequency:
character 4
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
source 0 1 11 16 0 4 0
target 0 1 11 16 0 4 0
ligand_complex 0 1 2 16 0 267 0
receptor_complex 0 1 3 19 0 284 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
…1 0 1 9383.50 5418.00 0.00 4691.75 9383.50 14075.25 18767.00 ▇▇▇▇▇
lr_means 0 1 0.11 0.28 0.02 0.02 0.02 0.02 3.69 ▇▁▁▁▁
cellphone_pvals 0 1 0.86 0.33 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
expr_prod 0 1 0.06 0.49 0.00 0.00 0.00 0.00 13.31 ▇▁▁▁▁
scaled_weight 0 1 -0.94 0.67 -1.27 -1.27 -1.27 -1.27 4.34 ▇▂▁▁▁
lr_logfc 0 1 -1.74 0.92 -2.21 -2.21 -2.21 -2.21 3.41 ▇▁▁▁▁
spec_weight 0 1 0.02 0.07 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
lrscore 0 1 0.22 0.21 0.12 0.12 0.12 0.12 0.96 ▇▁▁▁▁
lr_probs 0 1 0.00 0.01 0.00 0.00 0.00 0.00 0.32 ▇▁▁▁▁
cellchat_pvals 0 1 0.95 0.21 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
steady_rank 0 1 0.80 0.39 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
specificity_rank 0 1 0.81 0.38 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
magnitude_rank 0 1 0.91 0.26 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
Code
skim(lutomska_hfhsd)
Data summary
Name lutomska_hfhsd
Number of rows 19936
Number of columns 17
_______________________
Column type frequency:
character 4
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
source 0 1 11 16 0 4 0
target 0 1 11 16 0 4 0
ligand_complex 0 1 2 16 0 304 0
receptor_complex 0 1 3 19 0 320 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
…1 0 1 9967.50 5755.17 0.00 4983.75 9967.50 14951.25 19935.00 ▇▇▇▇▇
lr_means 0 1 0.10 0.26 0.02 0.02 0.02 0.02 3.71 ▇▁▁▁▁
cellphone_pvals 0 1 0.88 0.32 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
expr_prod 0 1 0.05 0.44 0.00 0.00 0.00 0.00 13.41 ▇▁▁▁▁
scaled_weight 0 1 -1.14 0.70 -1.45 -1.45 -1.45 -1.45 4.60 ▇▂▁▁▁
lr_logfc 0 1 -1.88 0.91 -2.29 -2.29 -2.29 -2.29 4.02 ▇▂▁▁▁
spec_weight 0 1 0.02 0.07 0.00 0.00 0.00 0.00 0.97 ▇▁▁▁▁
lrscore 0 1 0.23 0.19 0.14 0.14 0.14 0.14 0.96 ▇▁▁▁▁
lr_probs 0 1 0.00 0.01 0.00 0.00 0.00 0.00 0.33 ▇▁▁▁▁
cellchat_pvals 0 1 0.96 0.20 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
steady_rank 0 1 0.83 0.37 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
specificity_rank 0 1 0.84 0.36 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
magnitude_rank 0 1 0.91 0.26 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
Code
skim(deng_norm)
Data summary
Name deng_norm
Number of rows 19696
Number of columns 17
_______________________
Column type frequency:
character 4
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
source 0 1 11 16 0 4 0
target 0 1 11 16 0 4 0
ligand_complex 0 1 2 16 0 290 0
receptor_complex 0 1 3 19 0 314 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
…1 0 1 9847.50 5685.89 0.00 4923.75 9847.50 14771.25 19695.00 ▇▇▇▇▇
lr_means 0 1 0.12 0.29 0.01 0.01 0.01 0.09 3.71 ▇▁▁▁▁
cellphone_pvals 0 1 0.83 0.35 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
expr_prod 0 1 0.06 0.50 0.00 0.00 0.00 0.00 13.45 ▇▁▁▁▁
scaled_weight 0 1 -0.89 0.68 -1.30 -1.30 -1.30 -0.21 2.35 ▇▂▁▁▁
lr_logfc 0 1 -2.18 1.38 -3.05 -3.05 -3.05 -0.44 4.22 ▇▁▂▁▁
spec_weight 0 1 0.03 0.06 0.00 0.00 0.00 0.02 0.91 ▇▁▁▁▁
lrscore 0 1 0.20 0.23 0.07 0.07 0.07 0.29 0.95 ▇▁▁▁▁
lr_probs 0 1 0.00 0.01 0.00 0.00 0.00 0.00 0.32 ▇▁▁▁▁
cellchat_pvals 0 1 0.92 0.26 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
steady_rank 0 1 0.76 0.41 0.00 0.32 1.00 1.00 1.00 ▂▁▁▁▇
specificity_rank 0 1 0.77 0.39 0.00 0.49 1.00 1.00 1.00 ▂▁▁▁▇
magnitude_rank 0 1 0.89 0.29 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
Code
skim(deng_hfd)
Data summary
Name deng_hfd
Number of rows 20208
Number of columns 17
_______________________
Column type frequency:
character 4
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
source 0 1 11 16 0 4 0
target 0 1 11 16 0 4 0
ligand_complex 0 1 2 16 0 298 0
receptor_complex 0 1 3 19 0 318 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
…1 0 1 10103.50 5833.69 0.00 5051.75 10103.50 15155.25 20207.00 ▇▇▇▇▇
lr_means 0 1 0.12 0.29 0.02 0.02 0.02 0.09 3.75 ▇▁▁▁▁
cellphone_pvals 0 1 0.83 0.35 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
expr_prod 0 1 0.07 0.53 0.00 0.00 0.00 0.00 13.79 ▇▁▁▁▁
scaled_weight 0 1 -0.71 0.57 -1.05 -1.05 -1.05 -0.19 3.42 ▇▂▁▁▁
lr_logfc 0 1 -1.44 0.91 -2.01 -2.01 -2.01 -0.41 2.46 ▇▁▂▁▁
spec_weight 0 1 0.03 0.06 0.00 0.00 0.00 0.02 0.81 ▇▁▁▁▁
lrscore 0 1 0.21 0.22 0.09 0.09 0.09 0.28 0.96 ▇▁▁▁▁
lr_probs 0 1 0.00 0.02 0.00 0.00 0.00 0.00 0.39 ▇▁▁▁▁
cellchat_pvals 0 1 0.93 0.24 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
steady_rank 0 1 0.75 0.40 0.00 0.32 1.00 1.00 1.00 ▂▁▁▁▇
specificity_rank 0 1 0.77 0.39 0.00 0.50 1.00 1.00 1.00 ▂▁▁▁▇
magnitude_rank 0 1 0.89 0.28 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
Code
# skim(lopez_norm)
Code
# skim(lopez_csds)

Merge datasets

Code
lutomska_norm <-
  lutomska_norm |>
  mutate(
    region = "ARC",
    condition = "HFHSD",
    control = TRUE
  )

lutomska_hfhsd <-
  lutomska_hfhsd |>
  mutate(
    region = "ARC",
    condition = "HFHSD",
    control = FALSE
  )

deng_norm <-
  deng_norm |>
  mutate(
    region = "ARC",
    condition = "HFD",
    control = TRUE
  )

deng_hfd <-
  deng_hfd |>
  mutate(
    region = "ARC",
    condition = "HFD",
    control = FALSE
  )

# lopez_norm <-
#   lopez_norm |>
#   mutate(
#     region = "PVN",
#     condition = "CSDS",
#     control = TRUE
#   )
# 
# lopez_csds <-
#   lopez_csds |>
#   mutate(
#     region = "PVN",
#     condition = "CSDS",
#     control = FALSE
#   )

df_complex_conditions <-
  bind_rows(
    # lopez_norm,
    # lopez_csds,
    lutomska_norm,
    lutomska_hfhsd,
    deng_norm,
    deng_hfd
  )
df_complex_conditions <-
  df_complex_conditions |>
  mutate(pairs = str_c(source, target, sep = "==>")) |>
  filter(
    pairs %in% c(
      "Astro_INSR+==>POMC neurons",
      "Astro_INSR-==>POMC neurons",
      "Astro_INSR+==>AGRP/NPY neurons",
      "Astro_INSR-==>AGRP/NPY neurons",
      "POMC neurons==>Astro_INSR+",
      "AGRP/NPY neurons==>Astro_INSR+",
      "POMC neurons==>Astro_INSR-",
      "AGRP/NPY neurons==>Astro_INSR-"
    ),
    cellchat_pvals < 1, cellphone_pvals < 1
  )

Deng 2020

Signalling pairs

How do insuline receptor positive or negative astrocytes coupled with POMC neurons?

Code
df <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>POMC neurons",
      "Astro_INSR-==>POMC neurons"
    ),
    condition == "HFD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    neg_log10_magnitude_rank = -log10(magnitude_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, lr_probs, control)

df
Code
skim(df)
Data summary
Name df
Number of rows 126
Number of columns 6
_______________________
Column type frequency:
character 4
logical 1
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
pairs 0 1 26 26 0 2 0
lr_complex 0 1 8 27 0 68 0
ligand_complex 0 1 3 7 0 20 0
receptor_complex 0 1 3 19 0 51 0

Variable type: logical

skim_variable n_missing complete_rate mean count
control 0 1 1 TRU: 126

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
lr_probs 0 1 0.02 0.05 0 0 0.01 0.02 0.29 ▇▁▁▁▁
Code
df_specificity <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>POMC neurons",
      "Astro_INSR-==>POMC neurons"
    ),
    condition == "HFD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, neg_log10_specificity_rank, control)

# Reshape the dataframe to a wider format
df_wide <- df %>%
  pivot_wider(names_from = pairs, values_from = lr_probs)

# Calculate the difference and categorize it
df_wide <- df_wide %>%
  mutate(diff_rank = `Astro_INSR+==>POMC neurons` - `Astro_INSR-==>POMC neurons`) %>%
  mutate(diff_category = ifelse(abs(diff_rank) > 0.001 | is.na(abs(diff_rank)), "big difference", "small difference"))

df_wide
Code
skim(df_wide)
Data summary
Name df_wide
Number of rows 68
Number of columns 8
_______________________
Column type frequency:
character 4
logical 1
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
lr_complex 0 1 8 27 0 68 0
ligand_complex 0 1 3 7 0 20 0
receptor_complex 0 1 3 19 0 51 0
diff_category 0 1 14 16 0 2 0

Variable type: logical

skim_variable n_missing complete_rate mean count
control 0 1 1 TRU: 68

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Astro_INSR+==>POMC neurons 8 0.88 0.02 0.05 0 0 0.01 0.02 0.29 ▇▁▁▁▁
Astro_INSR-==>POMC neurons 2 0.97 0.02 0.05 0 0 0.01 0.02 0.29 ▇▁▁▁▁
diff_rank 10 0.85 0.00 0.00 0 0 0.00 0.00 0.01 ▇▁▁▁▁
Code
# Reshape the dataframe back to a long format
df_long <- df_wide %>%
  pivot_longer(cols = c(`Astro_INSR+==>POMC neurons`, `Astro_INSR-==>POMC neurons`), names_to = "pairs", values_to = "lr_probs") %>%
  full_join(df_specificity, by = c("pairs", "lr_complex", "ligand_complex", "receptor_complex", "control"))

df_long
Code
skim(df_long)
Data summary
Name df_long
Number of rows 136
Number of columns 9
_______________________
Column type frequency:
character 5
logical 1
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
lr_complex 0 1 8 27 0 68 0
ligand_complex 0 1 3 7 0 20 0
receptor_complex 0 1 3 19 0 51 0
diff_category 0 1 14 16 0 2 0
pairs 0 1 26 26 0 2 0

Variable type: logical

skim_variable n_missing complete_rate mean count
control 0 1 1 TRU: 136

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
diff_rank 20 0.85 0.00 0.00 0.00 0.00 0.00 0.00 0.01 ▇▁▁▁▁
lr_probs 10 0.93 0.02 0.05 0.00 0.00 0.01 0.02 0.29 ▇▁▁▁▁
neg_log10_specificity_rank 10 0.93 2.62 1.15 1.19 1.52 2.57 3.27 6.56 ▇▆▃▁▁
Code
# Prepare the top 20 data
top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 40) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = lr_probs, n = 10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = diff_rank, n = 15) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 60) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 20) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10) %>%
  distinct(lr_complex, diff_category, pairs, .keep_all = TRUE) %>%
  filter(!ligand_complex %in% c("Pomc", "Agrp", "Npy"))

top_bot_10
Code
skim(top_bot_10)
Data summary
Name top_bot_10
Number of rows 79
Number of columns 9
_______________________
Column type frequency:
character 4
logical 1
numeric 3
________________________
Group variables pairs

Variable type: character

skim_variable pairs n_missing complete_rate min max empty n_unique whitespace
lr_complex Astro_INSR+==>POMC neurons 0 1 8 18 0 37 0
lr_complex Astro_INSR-==>POMC neurons 0 1 8 18 0 42 0
ligand_complex Astro_INSR+==>POMC neurons 0 1 3 7 0 17 0
ligand_complex Astro_INSR-==>POMC neurons 0 1 3 7 0 16 0
receptor_complex Astro_INSR+==>POMC neurons 0 1 3 11 0 29 0
receptor_complex Astro_INSR-==>POMC neurons 0 1 3 11 0 32 0
diff_category Astro_INSR+==>POMC neurons 0 1 14 16 0 2 0
diff_category Astro_INSR-==>POMC neurons 0 1 14 16 0 2 0

Variable type: logical

skim_variable pairs n_missing complete_rate mean count
control Astro_INSR+==>POMC neurons 0 1 1 TRU: 37
control Astro_INSR-==>POMC neurons 0 1 1 TRU: 42

Variable type: numeric

skim_variable pairs n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
diff_rank Astro_INSR+==>POMC neurons 4 0.89 0.00 0.00 0.00 0.00 0.00 0.00 0.01 ▇▁▁▁▁
diff_rank Astro_INSR-==>POMC neurons 9 0.79 0.00 0.00 0.00 0.00 0.00 0.00 0.01 ▇▁▁▁▁
lr_probs Astro_INSR+==>POMC neurons 2 0.95 0.03 0.06 0.00 0.00 0.01 0.04 0.29 ▇▁▁▁▁
lr_probs Astro_INSR-==>POMC neurons 1 0.98 0.03 0.06 0.00 0.00 0.01 0.04 0.29 ▇▁▁▁▁
neg_log10_specificity_rank Astro_INSR+==>POMC neurons 2 0.95 2.97 1.15 1.26 2.25 2.93 3.36 6.56 ▅▇▃▁▁
neg_log10_specificity_rank Astro_INSR-==>POMC neurons 1 0.98 2.96 1.26 1.19 2.06 2.93 3.84 6.56 ▇▆▇▁▁
Code
# Generate the plot
pr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pr <- pr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>POMC neurons",
      diff_rank >= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>POMC neurons",
      diff_rank <= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pr)

Code
pl <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pl <- pl + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>POMC neurons",
      diff_rank >= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>POMC neurons",
      diff_rank <= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pl)

Code
plr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
plr <- plr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR+==>POMC neurons" & 
        diff_rank >= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR+==>POMC neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR-==>POMC neurons" & 
        diff_rank <= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR-==>POMC neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(plr)

How do insuline receptor positive or negative astrocytes coupled with AGRP/NPY neurons?

Code
df <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>AGRP/NPY neurons",
      "Astro_INSR-==>AGRP/NPY neurons"
    ),
    condition == "HFD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    neg_log10_magnitude_rank = -log10(magnitude_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, lr_probs, control)

df
Code
skim(df)
Data summary
Name df
Number of rows 129
Number of columns 6
_______________________
Column type frequency:
character 4
logical 1
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
pairs 0 1 30 30 0 2 0
lr_complex 0 1 8 27 0 69 0
ligand_complex 0 1 3 7 0 22 0
receptor_complex 0 1 3 19 0 53 0

Variable type: logical

skim_variable n_missing complete_rate mean count
control 0 1 1 TRU: 129

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
lr_probs 0 1 0.03 0.05 0 0 0.01 0.02 0.32 ▇▁▁▁▁
Code
df_specificity <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>AGRP/NPY neurons",
      "Astro_INSR-==>AGRP/NPY neurons"
    ),
    condition == "HFD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, neg_log10_specificity_rank, control)

# Reshape the dataframe to a wider format
df_wide <- df %>%
  pivot_wider(names_from = pairs, values_from = lr_probs)

# Calculate the difference and categorize it
df_wide <- df_wide %>%
  mutate(diff_rank = `Astro_INSR+==>AGRP/NPY neurons` - `Astro_INSR-==>AGRP/NPY neurons`) %>%
  mutate(diff_category = ifelse(abs(diff_rank) > 0.001 | is.na(abs(diff_rank)), "big difference", "small difference"))

df_wide
Code
skim(df_wide)
Data summary
Name df_wide
Number of rows 69
Number of columns 8
_______________________
Column type frequency:
character 4
logical 1
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
lr_complex 0 1 8 27 0 69 0
ligand_complex 0 1 3 7 0 22 0
receptor_complex 0 1 3 19 0 53 0
diff_category 0 1 14 16 0 2 0

Variable type: logical

skim_variable n_missing complete_rate mean count
control 0 1 1 TRU: 69

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Astro_INSR+==>AGRP/NPY neurons 7 0.90 0.03 0.05 0 0 0.01 0.02 0.32 ▇▁▁▁▁
Astro_INSR-==>AGRP/NPY neurons 2 0.97 0.03 0.05 0 0 0.01 0.02 0.31 ▇▁▁▁▁
diff_rank 9 0.87 0.00 0.00 0 0 0.00 0.00 0.01 ▇▁▁▁▁
Code
# Reshape the dataframe back to a long format
df_long <- df_wide %>%
  pivot_longer(cols = c(`Astro_INSR+==>AGRP/NPY neurons`, `Astro_INSR-==>AGRP/NPY neurons`), names_to = "pairs", values_to = "lr_probs") %>%
  full_join(df_specificity, by = c("pairs", "lr_complex", "ligand_complex", "receptor_complex", "control"))

df_long
Code
skim(df_long)
Data summary
Name df_long
Number of rows 138
Number of columns 9
_______________________
Column type frequency:
character 5
logical 1
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
lr_complex 0 1 8 27 0 69 0
ligand_complex 0 1 3 7 0 22 0
receptor_complex 0 1 3 19 0 53 0
diff_category 0 1 14 16 0 2 0
pairs 0 1 30 30 0 2 0

Variable type: logical

skim_variable n_missing complete_rate mean count
control 0 1 1 TRU: 138

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
diff_rank 18 0.87 0.00 0.00 0.00 0.00 0.00 0.00 0.01 ▇▁▁▁▁
lr_probs 9 0.93 0.03 0.05 0.00 0.00 0.01 0.02 0.32 ▇▁▁▁▁
neg_log10_specificity_rank 9 0.93 2.93 1.29 1.24 2.05 2.58 3.62 6.56 ▇▆▃▂▁
Code
# Prepare the top 20 data
top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 40) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = lr_probs, n = 10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = diff_rank, n = 15) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 60) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 20) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10) %>%
  distinct(lr_complex, diff_category, pairs, .keep_all = TRUE) %>%
  filter(!ligand_complex %in% c("Pomc", "Agrp", "Npy"))

top_bot_10
Code
skim(top_bot_10)
Data summary
Name top_bot_10
Number of rows 75
Number of columns 9
_______________________
Column type frequency:
character 4
logical 1
numeric 3
________________________
Group variables pairs

Variable type: character

skim_variable pairs n_missing complete_rate min max empty n_unique whitespace
lr_complex Astro_INSR+==>AGRP/NPY neurons 0 1 8 27 0 35 0
lr_complex Astro_INSR-==>AGRP/NPY neurons 0 1 8 27 0 40 0
ligand_complex Astro_INSR+==>AGRP/NPY neurons 0 1 3 7 0 16 0
ligand_complex Astro_INSR-==>AGRP/NPY neurons 0 1 3 7 0 17 0
receptor_complex Astro_INSR+==>AGRP/NPY neurons 0 1 3 19 0 27 0
receptor_complex Astro_INSR-==>AGRP/NPY neurons 0 1 3 19 0 29 0
diff_category Astro_INSR+==>AGRP/NPY neurons 0 1 14 16 0 2 0
diff_category Astro_INSR-==>AGRP/NPY neurons 0 1 14 16 0 2 0

Variable type: logical

skim_variable pairs n_missing complete_rate mean count
control Astro_INSR+==>AGRP/NPY neurons 0 1 1 TRU: 35
control Astro_INSR-==>AGRP/NPY neurons 0 1 1 TRU: 40

Variable type: numeric

skim_variable pairs n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
diff_rank Astro_INSR+==>AGRP/NPY neurons 4 0.89 0.00 0.00 0.00 0.00 0.00 0.00 0.01 ▇▂▁▁▁
diff_rank Astro_INSR-==>AGRP/NPY neurons 8 0.80 0.00 0.00 0.00 0.00 0.00 0.00 0.01 ▇▂▁▁▁
lr_probs Astro_INSR+==>AGRP/NPY neurons 2 0.94 0.05 0.07 0.00 0.00 0.02 0.05 0.32 ▇▁▁▁▁
lr_probs Astro_INSR-==>AGRP/NPY neurons 1 0.98 0.04 0.06 0.00 0.00 0.02 0.05 0.31 ▇▁▁▁▁
neg_log10_specificity_rank Astro_INSR+==>AGRP/NPY neurons 2 0.94 3.43 1.27 1.31 2.59 3.18 4.24 6.56 ▅▇▃▂▂
neg_log10_specificity_rank Astro_INSR-==>AGRP/NPY neurons 1 0.98 3.39 1.34 1.24 2.25 3.31 4.23 6.56 ▇▇▇▃▂
Code
# Generate the plot
pr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pr <- pr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>AGRP/NPY neurons",
      diff_rank >= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>AGRP/NPY neurons",
      diff_rank <= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pr)

Code
pl <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pl <- pl + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>AGRP/NPY neurons",
      diff_rank >= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>AGRP/NPY neurons",
      diff_rank <= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pl)

Code
plr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
plr <- plr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR+==>AGRP/NPY neurons" & 
        diff_rank >= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR+==>AGRP/NPY neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR-==>AGRP/NPY neurons" & 
        diff_rank <= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR-==>AGRP/NPY neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(plr)

Lutomska 2022

Signalling pairs

How do insuline receptor positive or negative astrocytes coupled with POMC neurons?

Code
df <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>POMC neurons",
      "Astro_INSR-==>POMC neurons"
    ),
    condition == "HFHSD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    neg_log10_magnitude_rank = -log10(magnitude_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, lr_probs, control)

df
Code
skim(df)
Data summary
Name df
Number of rows 143
Number of columns 6
_______________________
Column type frequency:
character 4
logical 1
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
pairs 0 1 26 26 0 2 0
lr_complex 0 1 8 20 0 90 0
ligand_complex 0 1 3 7 0 30 0
receptor_complex 0 1 3 13 0 58 0

Variable type: logical

skim_variable n_missing complete_rate mean count
control 0 1 1 TRU: 143

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
lr_probs 0 1 0.02 0.05 0 0 0.01 0.02 0.31 ▇▁▁▁▁
Code
df_specificity <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>POMC neurons",
      "Astro_INSR-==>POMC neurons"
    ),
    condition == "HFHSD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, neg_log10_specificity_rank, control)

# Reshape the dataframe to a wider format
df_wide <- df %>%
  pivot_wider(names_from = pairs, values_from = lr_probs)

# Calculate the difference and categorize it
df_wide <- df_wide %>%
  mutate(diff_rank = `Astro_INSR+==>POMC neurons` - `Astro_INSR-==>POMC neurons`) %>%
  mutate(diff_category = ifelse(abs(diff_rank) > 0.001 | is.na(abs(diff_rank)), "big difference", "small difference"))

df_wide
Code
skim(df_wide)
Data summary
Name df_wide
Number of rows 90
Number of columns 8
_______________________
Column type frequency:
character 4
logical 1
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
lr_complex 0 1 8 20 0 90 0
ligand_complex 0 1 3 7 0 30 0
receptor_complex 0 1 3 13 0 58 0
diff_category 0 1 14 16 0 2 0

Variable type: logical

skim_variable n_missing complete_rate mean count
control 0 1 1 TRU: 90

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Astro_INSR+==>POMC neurons 0 1.00 0.02 0.05 0 0 0.01 0.02 0.31 ▇▁▁▁▁
Astro_INSR-==>POMC neurons 37 0.59 0.03 0.05 0 0 0.01 0.02 0.30 ▇▁▁▁▁
diff_rank 37 0.59 0.01 0.01 0 0 0.00 0.01 0.03 ▇▅▃▁▁
Code
# Reshape the dataframe back to a long format
df_long <- df_wide %>%
  pivot_longer(cols = c(`Astro_INSR+==>POMC neurons`, `Astro_INSR-==>POMC neurons`), names_to = "pairs", values_to = "lr_probs") %>%
  full_join(df_specificity, by = c("pairs", "lr_complex", "ligand_complex", "receptor_complex", "control"))

df_long
Code
skim(df_long)
Data summary
Name df_long
Number of rows 180
Number of columns 9
_______________________
Column type frequency:
character 5
logical 1
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
lr_complex 0 1 8 20 0 90 0
ligand_complex 0 1 3 7 0 30 0
receptor_complex 0 1 3 13 0 58 0
diff_category 0 1 14 16 0 2 0
pairs 0 1 26 26 0 2 0

Variable type: logical

skim_variable n_missing complete_rate mean count
control 0 1 1 TRU: 180

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
diff_rank 74 0.59 0.01 0.01 0.00 0.00 0.00 0.01 0.03 ▇▅▃▁▁
lr_probs 37 0.79 0.02 0.05 0.00 0.00 0.01 0.02 0.31 ▇▁▁▁▁
neg_log10_specificity_rank 37 0.79 3.41 1.29 1.69 2.28 3.32 4.42 7.13 ▇▃▆▂▁
Code
# Prepare the top 20 data
top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 40) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = lr_probs, n = 10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = diff_rank, n = 15) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 60) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 20) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10) %>%
  distinct(lr_complex, diff_category, pairs, .keep_all = TRUE) %>%
  filter(!ligand_complex %in% c("Pomc", "Agrp", "Npy"))

top_bot_10
Code
skim(top_bot_10)
Data summary
Name top_bot_10
Number of rows 68
Number of columns 9
_______________________
Column type frequency:
character 4
logical 1
numeric 3
________________________
Group variables pairs

Variable type: character

skim_variable pairs n_missing complete_rate min max empty n_unique whitespace
lr_complex Astro_INSR+==>POMC neurons 0 1 8 19 0 35 0
lr_complex Astro_INSR-==>POMC neurons 0 1 8 19 0 33 0
ligand_complex Astro_INSR+==>POMC neurons 0 1 3 6 0 16 0
ligand_complex Astro_INSR-==>POMC neurons 0 1 3 6 0 14 0
receptor_complex Astro_INSR+==>POMC neurons 0 1 3 11 0 27 0
receptor_complex Astro_INSR-==>POMC neurons 0 1 3 11 0 25 0
diff_category Astro_INSR+==>POMC neurons 0 1 14 16 0 2 0
diff_category Astro_INSR-==>POMC neurons 0 1 14 16 0 2 0

Variable type: logical

skim_variable pairs n_missing complete_rate mean count
control Astro_INSR+==>POMC neurons 0 1 1 TRU: 35
control Astro_INSR-==>POMC neurons 0 1 1 TRU: 33

Variable type: numeric

skim_variable pairs n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
diff_rank Astro_INSR+==>POMC neurons 1 0.97 0.01 0.01 0.00 0.00 0.00 0.01 0.03 ▇▃▂▁▁
diff_rank Astro_INSR-==>POMC neurons 0 1.00 0.01 0.01 0.00 0.00 0.00 0.01 0.03 ▇▃▂▁▁
lr_probs Astro_INSR+==>POMC neurons 0 1.00 0.05 0.07 0.00 0.00 0.02 0.06 0.31 ▇▂▁▁▁
lr_probs Astro_INSR-==>POMC neurons 0 1.00 0.04 0.07 0.00 0.00 0.01 0.05 0.30 ▇▁▁▁▁
neg_log10_specificity_rank Astro_INSR+==>POMC neurons 0 1.00 3.34 1.40 1.69 2.17 2.86 4.49 7.13 ▇▃▃▂▁
neg_log10_specificity_rank Astro_INSR-==>POMC neurons 0 1.00 3.02 1.09 1.78 2.28 2.93 3.54 5.91 ▇▅▂▂▁
Code
# Generate the plot
pr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pr <- pr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>POMC neurons",
      diff_rank >= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>POMC neurons",
      diff_rank <= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pr)

Code
pl <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pl <- pl + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>POMC neurons",
      diff_rank >= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>POMC neurons",
      diff_rank <= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pl)

Code
plr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
plr <- plr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR+==>POMC neurons" & 
        diff_rank >= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR+==>POMC neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR-==>POMC neurons" & 
        diff_rank <= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR-==>POMC neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(plr)

How do insuline receptor positive or negative astrocytes coupled with AGRP/NPY neurons?

Code
df <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>AGRP/NPY neurons",
      "Astro_INSR-==>AGRP/NPY neurons"
    ),
    condition == "HFHSD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    neg_log10_magnitude_rank = -log10(magnitude_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, lr_probs, control)

df
Code
skim(df)
Data summary
Name df
Number of rows 117
Number of columns 6
_______________________
Column type frequency:
character 4
logical 1
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
pairs 0 1 30 30 0 2 0
lr_complex 0 1 8 20 0 75 0
ligand_complex 0 1 3 7 0 28 0
receptor_complex 0 1 3 13 0 49 0

Variable type: logical

skim_variable n_missing complete_rate mean count
control 0 1 1 TRU: 117

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
lr_probs 0 1 0.02 0.04 0 0 0 0.02 0.26 ▇▁▁▁▁
Code
df_specificity <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>AGRP/NPY neurons",
      "Astro_INSR-==>AGRP/NPY neurons"
    ),
    condition == "HFHSD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, neg_log10_specificity_rank, control)

# Reshape the dataframe to a wider format
df_wide <- df %>%
  pivot_wider(names_from = pairs, values_from = lr_probs)

# Calculate the difference and categorize it
df_wide <- df_wide %>%
  mutate(diff_rank = `Astro_INSR+==>AGRP/NPY neurons` - `Astro_INSR-==>AGRP/NPY neurons`) %>%
  mutate(diff_category = ifelse(abs(diff_rank) > 0.001 | is.na(abs(diff_rank)), "big difference", "small difference"))

df_wide
Code
skim(df_wide)
Data summary
Name df_wide
Number of rows 75
Number of columns 8
_______________________
Column type frequency:
character 4
logical 1
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
lr_complex 0 1 8 20 0 75 0
ligand_complex 0 1 3 7 0 28 0
receptor_complex 0 1 3 13 0 49 0
diff_category 0 1 14 16 0 2 0

Variable type: logical

skim_variable n_missing complete_rate mean count
control 0 1 1 TRU: 75

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Astro_INSR+==>AGRP/NPY neurons 0 1.00 0.02 0.04 0 0 0.00 0.02 0.26 ▇▁▁▁▁
Astro_INSR-==>AGRP/NPY neurons 33 0.56 0.02 0.04 0 0 0.01 0.01 0.21 ▇▁▁▁▁
diff_rank 33 0.56 0.00 0.00 0 0 0.00 0.01 0.02 ▇▃▂▁▁
Code
# Reshape the dataframe back to a long format
df_long <- df_wide %>%
  pivot_longer(cols = c(`Astro_INSR+==>AGRP/NPY neurons`, `Astro_INSR-==>AGRP/NPY neurons`), names_to = "pairs", values_to = "lr_probs") %>%
  full_join(df_specificity, by = c("pairs", "lr_complex", "ligand_complex", "receptor_complex", "control"))

df_long
Code
skim(df_long)
Data summary
Name df_long
Number of rows 150
Number of columns 9
_______________________
Column type frequency:
character 5
logical 1
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
lr_complex 0 1 8 20 0 75 0
ligand_complex 0 1 3 7 0 28 0
receptor_complex 0 1 3 13 0 49 0
diff_category 0 1 14 16 0 2 0
pairs 0 1 30 30 0 2 0

Variable type: logical

skim_variable n_missing complete_rate mean count
control 0 1 1 TRU: 150

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
diff_rank 66 0.56 0.00 0.00 0.00 0.00 0.0 0.01 0.02 ▇▃▂▁▁
lr_probs 33 0.78 0.02 0.04 0.00 0.00 0.0 0.02 0.26 ▇▁▁▁▁
neg_log10_specificity_rank 33 0.78 3.18 1.21 1.72 2.15 3.2 3.65 7.13 ▇▇▂▁▁
Code
# Prepare the top 20 data
top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 40) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = lr_probs, n = 10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = diff_rank, n = 15) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 60) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 20) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10) %>%
  distinct(lr_complex, diff_category, pairs, .keep_all = TRUE) %>%
  filter(!ligand_complex %in% c("Pomc", "Agrp", "Npy"))

top_bot_10
Code
skim(top_bot_10)
Data summary
Name top_bot_10
Number of rows 58
Number of columns 9
_______________________
Column type frequency:
character 4
logical 1
numeric 3
________________________
Group variables pairs

Variable type: character

skim_variable pairs n_missing complete_rate min max empty n_unique whitespace
lr_complex Astro_INSR+==>AGRP/NPY neurons 0 1 8 19 0 31 0
lr_complex Astro_INSR-==>AGRP/NPY neurons 0 1 8 19 0 27 0
ligand_complex Astro_INSR+==>AGRP/NPY neurons 0 1 3 6 0 16 0
ligand_complex Astro_INSR-==>AGRP/NPY neurons 0 1 3 6 0 15 0
receptor_complex Astro_INSR+==>AGRP/NPY neurons 0 1 3 11 0 19 0
receptor_complex Astro_INSR-==>AGRP/NPY neurons 0 1 3 11 0 17 0
diff_category Astro_INSR+==>AGRP/NPY neurons 0 1 14 16 0 2 0
diff_category Astro_INSR-==>AGRP/NPY neurons 0 1 14 16 0 2 0

Variable type: logical

skim_variable pairs n_missing complete_rate mean count
control Astro_INSR+==>AGRP/NPY neurons 0 1 1 TRU: 31
control Astro_INSR-==>AGRP/NPY neurons 0 1 1 TRU: 27

Variable type: numeric

skim_variable pairs n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
diff_rank Astro_INSR+==>AGRP/NPY neurons 4 0.87 0.01 0.01 0.00 0.00 0.00 0.01 0.02 ▇▆▅▁▁
diff_rank Astro_INSR-==>AGRP/NPY neurons 0 1.00 0.01 0.01 0.00 0.00 0.00 0.01 0.02 ▇▆▅▁▁
lr_probs Astro_INSR+==>AGRP/NPY neurons 0 1.00 0.04 0.06 0.00 0.01 0.02 0.05 0.26 ▇▂▁▁▁
lr_probs Astro_INSR-==>AGRP/NPY neurons 0 1.00 0.03 0.04 0.00 0.00 0.01 0.03 0.21 ▇▂▁▁▁
neg_log10_specificity_rank Astro_INSR+==>AGRP/NPY neurons 0 1.00 3.23 1.37 1.72 2.02 3.20 3.78 7.13 ▇▇▃▁▁
neg_log10_specificity_rank Astro_INSR-==>AGRP/NPY neurons 0 1.00 3.05 1.12 1.84 2.27 3.04 3.26 6.87 ▇▇▁▁▁
Code
# Generate the plot
pr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pr <- pr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>AGRP/NPY neurons",
      diff_rank >= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>AGRP/NPY neurons",
      diff_rank <= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pr)

Code
pl <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pl <- pl + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>AGRP/NPY neurons",
      diff_rank >= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>AGRP/NPY neurons",
      diff_rank <= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pl)

Code
plr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
plr <- plr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR+==>AGRP/NPY neurons" & 
        diff_rank >= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR+==>AGRP/NPY neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR-==>AGRP/NPY neurons" & 
        diff_rank <= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR-==>AGRP/NPY neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(plr)

Session info

Code
sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.2.3 (2023-03-15)
 os       macOS 14.0
 system   aarch64, darwin20
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       Europe/Vienna
 date     2023-07-31
 pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 package          * version    date (UTC) lib source
 base64enc          0.1-3      2015-07-28 [1] CRAN (R 4.2.0)
 bayestestR         0.13.1     2023-04-07 [1] RSPM (R 4.2.3)
 bit                4.0.5      2022-11-15 [1] CRAN (R 4.2.0)
 bit64              4.0.5      2020-08-30 [1] CRAN (R 4.2.0)
 boot               1.3-28.1   2022-11-22 [1] CRAN (R 4.2.3)
 cli                3.6.1      2023-03-23 [1] RSPM (R 4.2.3)
 coda               0.19-4     2020-09-30 [1] CRAN (R 4.2.0)
 codetools          0.2-19     2023-02-01 [1] CRAN (R 4.2.3)
 colorspace         2.1-0      2023-01-23 [1] CRAN (R 4.2.0)
 ComplexUpset     * 1.3.5      2022-11-28 [1] Github (krassowski/complex-upset@bb3f10a)
 correlation        0.8.4      2023-04-06 [1] RSPM (R 4.2.3)
 cowplot          * 1.1.1      2020-12-30 [1] CRAN (R 4.2.0)
 crayon             1.5.2      2022-09-29 [1] CRAN (R 4.2.0)
 dabestr          * 0.3.0      2022-05-21 [1] Github (ACCLAB/dabestr@8775899)
 datawizard         0.7.1      2023-04-03 [1] RSPM (R 4.2.3)
 digest             0.6.31     2022-12-11 [1] CRAN (R 4.2.0)
 dplyr            * 1.1.2      2023-04-20 [1] RSPM (R 4.2.3)
 emmeans            1.8.5      2023-03-08 [1] RSPM (R 4.2.3)
 estimability       1.4.1      2022-08-05 [1] CRAN (R 4.2.0)
 evaluate           0.21       2023-05-05 [1] RSPM (R 4.2.3)
 fansi              1.0.4      2023-01-22 [1] CRAN (R 4.2.0)
 farver             2.1.1      2022-07-06 [1] CRAN (R 4.2.0)
 fastmap            1.1.1      2023-02-24 [1] RSPM (R 4.2.3)
 forcats          * 1.0.0      2023-01-29 [1] CRAN (R 4.2.0)
 future           * 1.33.0     2023-07-01 [1] RSPM (R 4.2.3)
 generics           0.1.3      2022-07-05 [1] CRAN (R 4.2.0)
 ggmin              0.0.0.9000 2022-08-12 [1] Github (sjessa/ggmin@8ada274)
 ggplot2          * 3.4.2      2023-04-03 [1] RSPM (R 4.2.3)
 ggrepel          * 0.9.3      2023-02-03 [1] RSPM (R 4.2.3)
 ggstatsplot      * 0.11.1     2023-04-14 [1] RSPM (R 4.2.3)
 ggupset          * 0.3.0.9002 2022-05-21 [1] Github (const-ae/ggupset@2a03c58)
 globals            0.16.2     2022-11-21 [1] CRAN (R 4.2.0)
 glue               1.6.2      2022-02-24 [1] CRAN (R 4.2.0)
 gtable             0.3.3      2023-03-21 [1] RSPM (R 4.2.3)
 here             * 1.0.1      2020-12-13 [1] CRAN (R 4.2.0)
 hms                1.1.3      2023-03-21 [1] RSPM (R 4.2.3)
 htmltools          0.5.5      2023-03-23 [1] RSPM (R 4.2.3)
 htmlwidgets        1.6.2      2023-03-17 [1] RSPM (R 4.2.3)
 insight            0.19.1     2023-03-18 [1] RSPM (R 4.2.3)
 jsonlite           1.8.4      2022-12-06 [1] CRAN (R 4.2.0)
 knitr              1.42       2023-01-25 [1] RSPM
 labeling           0.4.2      2020-10-20 [1] CRAN (R 4.2.0)
 lattice            0.21-8     2023-04-05 [1] RSPM (R 4.2.3)
 lifecycle          1.0.3      2022-10-07 [1] CRAN (R 4.2.0)
 listenv            0.9.0      2022-12-16 [1] CRAN (R 4.2.0)
 lubridate        * 1.9.2      2023-02-10 [1] RSPM (R 4.2.3)
 magrittr         * 2.0.3      2022-03-30 [1] CRAN (R 4.2.0)
 MASS               7.3-58.3   2023-03-07 [1] RSPM (R 4.2.3)
 Matrix             1.5-4      2023-04-04 [1] RSPM (R 4.2.3)
 multcomp           1.4-23     2023-03-09 [1] RSPM (R 4.2.3)
 munsell            0.5.0      2018-06-12 [1] CRAN (R 4.2.0)
 mvtnorm            1.1-3      2021-10-08 [1] CRAN (R 4.2.0)
 paletteer          1.5.0      2022-10-19 [1] CRAN (R 4.2.0)
 parallelly         1.36.0     2023-05-26 [1] RSPM (R 4.2.3)
 parameters         0.21.0     2023-04-19 [1] RSPM (R 4.2.3)
 patchwork        * 1.1.2.9000 2023-02-08 [1] Github (thomasp85/patchwork@c14c960)
 pillar             1.9.0      2023-03-22 [1] RSPM (R 4.2.3)
 pkgconfig          2.0.3      2019-09-22 [1] CRAN (R 4.2.0)
 png                0.1-8      2022-11-29 [1] CRAN (R 4.2.0)
 purrr            * 1.0.1      2023-01-10 [1] CRAN (R 4.2.0)
 R6                 2.5.1      2021-08-19 [1] CRAN (R 4.2.0)
 RColorBrewer     * 1.1-3      2022-04-03 [1] CRAN (R 4.2.0)
 Rcpp               1.0.10     2023-01-22 [1] CRAN (R 4.2.0)
 readr            * 2.1.4      2023-02-10 [1] RSPM (R 4.2.3)
 rematch2           2.1.2      2020-05-01 [1] CRAN (R 4.2.0)
 repr               1.1.6      2023-01-26 [1] CRAN (R 4.2.0)
 reticulate       * 1.30-9000  2023-07-30 [1] Github (rstudio/reticulate@9a1ed79)
 rlang              1.1.0      2023-03-14 [1] RSPM (R 4.2.3)
 rmarkdown          2.21       2023-03-26 [1] RSPM (R 4.2.3)
 rprojroot          2.0.3      2022-04-02 [1] CRAN (R 4.2.0)
 rstudioapi         0.14       2022-08-22 [1] CRAN (R 4.2.0)
 sandwich           3.0-2      2022-06-15 [1] CRAN (R 4.2.0)
 scales             1.2.1      2022-08-20 [1] CRAN (R 4.2.0)
 sessioninfo        1.2.2      2021-12-06 [1] CRAN (R 4.2.0)
 skimr            * 2.1.5      2023-02-08 [1] Github (ropensci/skimr@d5126aa)
 statsExpressions   1.5.0      2023-02-19 [1] RSPM (R 4.2.3)
 stringi            1.7.12     2023-01-11 [1] CRAN (R 4.2.0)
 stringr          * 1.5.0      2022-12-02 [1] CRAN (R 4.2.0)
 survival           3.5-5      2023-03-12 [1] RSPM (R 4.2.3)
 TH.data            1.1-2      2023-04-17 [1] RSPM (R 4.2.3)
 tibble           * 3.2.1      2023-03-20 [1] RSPM (R 4.2.3)
 tidyr            * 1.3.0      2023-01-24 [1] CRAN (R 4.2.0)
 tidyselect         1.2.0      2022-10-10 [1] CRAN (R 4.2.0)
 tidyverse        * 2.0.0      2023-02-22 [1] RSPM (R 4.2.3)
 timechange         0.2.0      2023-01-11 [1] CRAN (R 4.2.0)
 tzdb               0.3.0      2022-03-28 [1] CRAN (R 4.2.0)
 utf8               1.2.3      2023-01-31 [1] CRAN (R 4.2.0)
 vctrs              0.6.2      2023-04-19 [1] RSPM (R 4.2.3)
 vroom              1.6.1      2023-01-22 [1] CRAN (R 4.2.0)
 withr              2.5.0      2022-03-03 [1] CRAN (R 4.2.0)
 xfun               0.39       2023-04-20 [1] RSPM (R 4.2.3)
 xtable             1.8-4      2019-04-21 [1] CRAN (R 4.2.0)
 yaml               2.3.7      2023-01-23 [1] RSPM
 zeallot            0.1.0      2018-01-28 [1] CRAN (R 4.2.0)
 zoo                1.8-12     2023-04-13 [1] RSPM (R 4.2.3)

 [1] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library

──────────────────────────────────────────────────────────────────────────────