Detailed CellChat signalling analysis of Insr positive vs. Insr negative astrocytes

Author

Evgenii O. Tretiakov

Published

July 31, 2023

Load data and setup parameters

Code

# Load tidyverse infrastructure packages
suppressPackageStartupMessages({
  library(reticulate)
  library(future)
  library(here)
  library(tidyverse)
  library(magrittr)
  library(stringr)
  library(skimr)
  library(RColorBrewer)
})


# Load packages for scRNA-seq analysis and visualisation
suppressPackageStartupMessages({
  library(ggplot2)
  library(ggrepel)
  library(cowplot)
  library(patchwork)
  library(ggstatsplot)
  library(ggupset)
  library(ComplexUpset)
  library(dabestr)
})

Set paths

Code

src_dir <- here("code")
data_dir <- here("data")
output_dir <- here("output")
plots_dir <- here(output_dir, "figures")
tables_dir <- here(output_dir, "tables")

Load helper functions and gene-sets

Code

source(here(src_dir, "genes.R"))
source(here(src_dir, "functions.R"))

Set fixed variables

Code

# set seed
reseed <- 42
set.seed(seed = reseed)

# Parameters for parallel execution
n_cores <- 8
plan("multisession", workers = n_cores)
options(
  future.globals.maxSize = 100000 * 1024^2,
  future.rng.onMisuse = "ignore"
)
plan()

multisession:
- args: function (..., workers = 8, envir = parent.frame())
- tweaked: TRUE
- call: plan("multisession", workers = n_cores)

Code

# ggplot2 theme
theme_set(ggmin::theme_powerpoint())

Load selected astrocytes signalling data estimated using Liana-py and Rank Aggregation method (including CellPhoneDB, CellChat, ICELLNET, connectomeDB2020, and CellTalkDB) for CellChatDB only

Code

lutomska_norm <-
  read_csv(here(
    data_dir,
    "signalling", "lutomska",
    "arc_astro-with_npy_pomc-liana-res-cellchatdb.csv"
  ))

lutomska_hfhsd <-
  read_csv(here(
    data_dir,
    "signalling", "lutomska",
    "hfd-arc_astro-with_npy_pomc-liana-res-cellchatdb.csv"
  ))

deng_norm <-
  read_csv(here(
    data_dir,
    "signalling", "deng",
    "arc_astro-with_npy_pomc-liana-res-cellchatdb.csv"
  ))

deng_hfd <-
  read_csv(here(
    data_dir,
    "signalling", "deng",
    "hfd-arc_astro-with_npy_pomc-liana-res-cellchatdb.csv"
  ))

# lopez_norm <-
#   read_csv(here(
#     data_dir,
#     "signalling", "lopez",
#     "pvn_astro-with_npy_pomc-liana-res.csv"
#   ))
# 
# lopez_csds <-
#   read_csv(here(
#     data_dir,
#     "signalling", "lopez",
#     "chronic-stress-pvn_astro-with_npy_pomc-liana-res.csv"
#   ))

Code

skim(lutomska_norm)

Data summary
Name	lutomska_norm
Number of rows	18768
Number of columns	17
_______________________
Column type frequency:
character	4
numeric	13
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
source	1	11	16	4
target	1	11	16	4
ligand_complex	1	2	16	267
receptor_complex	1	3	19	284

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
…1	1	9383.50	5418.00	0.00	4691.75	9383.50	14075.25	18767.00	▇▇▇▇▇
lr_means	1	0.11	0.28	0.02	0.02	0.02	0.02	3.69	▇▁▁▁▁
cellphone_pvals	1	0.86	0.33	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇
expr_prod	1	0.06	0.49	0.00	0.00	0.00	0.00	13.31	▇▁▁▁▁
scaled_weight	1	-0.94	0.67	-1.27	-1.27	-1.27	-1.27	4.34	▇▂▁▁▁
lr_logfc	1	-1.74	0.92	-2.21	-2.21	-2.21	-2.21	3.41	▇▁▁▁▁
spec_weight	1	0.02	0.07	0.00	0.00	0.00	0.00	1.00	▇▁▁▁▁
lrscore	1	0.22	0.21	0.12	0.12	0.12	0.12	0.96	▇▁▁▁▁
lr_probs	1	0.00	0.01	0.00	0.00	0.00	0.00	0.32	▇▁▁▁▁
cellchat_pvals	1	0.95	0.21	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇
steady_rank	1	0.80	0.39	0.00	1.00	1.00	1.00	1.00	▂▁▁▁▇
specificity_rank	1	0.81	0.38	0.00	1.00	1.00	1.00	1.00	▂▁▁▁▇
magnitude_rank	1	0.91	0.26	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇

Code

skim(lutomska_hfhsd)

Data summary
Name	lutomska_hfhsd
Number of rows	19936
Number of columns	17
_______________________
Column type frequency:
character	4
numeric	13
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
source	1	11	16	4
target	1	11	16	4
ligand_complex	1	2	16	304
receptor_complex	1	3	19	320

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
…1	1	9967.50	5755.17	0.00	4983.75	9967.50	14951.25	19935.00	▇▇▇▇▇
lr_means	1	0.10	0.26	0.02	0.02	0.02	0.02	3.71	▇▁▁▁▁
cellphone_pvals	1	0.88	0.32	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇
expr_prod	1	0.05	0.44	0.00	0.00	0.00	0.00	13.41	▇▁▁▁▁
scaled_weight	1	-1.14	0.70	-1.45	-1.45	-1.45	-1.45	4.60	▇▂▁▁▁
lr_logfc	1	-1.88	0.91	-2.29	-2.29	-2.29	-2.29	4.02	▇▂▁▁▁
spec_weight	1	0.02	0.07	0.00	0.00	0.00	0.00	0.97	▇▁▁▁▁
lrscore	1	0.23	0.19	0.14	0.14	0.14	0.14	0.96	▇▁▁▁▁
lr_probs	1	0.00	0.01	0.00	0.00	0.00	0.00	0.33	▇▁▁▁▁
cellchat_pvals	1	0.96	0.20	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇
steady_rank	1	0.83	0.37	0.00	1.00	1.00	1.00	1.00	▂▁▁▁▇
specificity_rank	1	0.84	0.36	0.00	1.00	1.00	1.00	1.00	▂▁▁▁▇
magnitude_rank	1	0.91	0.26	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇

Code

skim(deng_norm)

Data summary
Name	deng_norm
Number of rows	19696
Number of columns	17
_______________________
Column type frequency:
character	4
numeric	13
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
source	1	11	16	4
target	1	11	16	4
ligand_complex	1	2	16	290
receptor_complex	1	3	19	314

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
…1	1	9847.50	5685.89	0.00	4923.75	9847.50	14771.25	19695.00	▇▇▇▇▇
lr_means	1	0.12	0.29	0.01	0.01	0.01	0.09	3.71	▇▁▁▁▁
cellphone_pvals	1	0.83	0.35	0.00	1.00	1.00	1.00	1.00	▂▁▁▁▇
expr_prod	1	0.06	0.50	0.00	0.00	0.00	0.00	13.45	▇▁▁▁▁
scaled_weight	1	-0.89	0.68	-1.30	-1.30	-1.30	-0.21	2.35	▇▂▁▁▁
lr_logfc	1	-2.18	1.38	-3.05	-3.05	-3.05	-0.44	4.22	▇▁▂▁▁
spec_weight	1	0.03	0.06	0.00	0.00	0.00	0.02	0.91	▇▁▁▁▁
lrscore	1	0.20	0.23	0.07	0.07	0.07	0.29	0.95	▇▁▁▁▁
lr_probs	1	0.00	0.01	0.00	0.00	0.00	0.00	0.32	▇▁▁▁▁
cellchat_pvals	1	0.92	0.26	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇
steady_rank	1	0.76	0.41	0.00	0.32	1.00	1.00	1.00	▂▁▁▁▇
specificity_rank	1	0.77	0.39	0.00	0.49	1.00	1.00	1.00	▂▁▁▁▇
magnitude_rank	1	0.89	0.29	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇

Code

skim(deng_hfd)

Data summary
Name	deng_hfd
Number of rows	20208
Number of columns	17
_______________________
Column type frequency:
character	4
numeric	13
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
source	1	11	16	4
target	1	11	16	4
ligand_complex	1	2	16	298
receptor_complex	1	3	19	318

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
…1	1	10103.50	5833.69	0.00	5051.75	10103.50	15155.25	20207.00	▇▇▇▇▇
lr_means	1	0.12	0.29	0.02	0.02	0.02	0.09	3.75	▇▁▁▁▁
cellphone_pvals	1	0.83	0.35	0.00	1.00	1.00	1.00	1.00	▂▁▁▁▇
expr_prod	1	0.07	0.53	0.00	0.00	0.00	0.00	13.79	▇▁▁▁▁
scaled_weight	1	-0.71	0.57	-1.05	-1.05	-1.05	-0.19	3.42	▇▂▁▁▁
lr_logfc	1	-1.44	0.91	-2.01	-2.01	-2.01	-0.41	2.46	▇▁▂▁▁
spec_weight	1	0.03	0.06	0.00	0.00	0.00	0.02	0.81	▇▁▁▁▁
lrscore	1	0.21	0.22	0.09	0.09	0.09	0.28	0.96	▇▁▁▁▁
lr_probs	1	0.00	0.02	0.00	0.00	0.00	0.00	0.39	▇▁▁▁▁
cellchat_pvals	1	0.93	0.24	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇
steady_rank	1	0.75	0.40	0.00	0.32	1.00	1.00	1.00	▂▁▁▁▇
specificity_rank	1	0.77	0.39	0.00	0.50	1.00	1.00	1.00	▂▁▁▁▇
magnitude_rank	1	0.89	0.28	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇

Code

# skim(lopez_norm)

Code

# skim(lopez_csds)

Filter by p-values

Code

lutomska_norm <-
  lutomska_norm |>
  filter(
    specificity_rank <= 1,
    magnitude_rank <= 1
  )

lutomska_hfhsd <-
  lutomska_hfhsd |>
  filter(
    specificity_rank <= 1,
    magnitude_rank <= 1
  )

deng_norm <-
  deng_norm |>
  filter(
    specificity_rank <= 1,
    magnitude_rank <= 1
  )

deng_hfd <-
  deng_hfd |>
  filter(
    specificity_rank <= 1,
    magnitude_rank <= 1
  )

# lopez_norm <-
#   lopez_norm |>
#   filter(
#     specificity_rank <= 0.05,
#     magnitude_rank <= 0.05
#   )
# 
# lopez_csds <-
#   lopez_csds |>
#   filter(
#     specificity_rank <= 0.05,
#     magnitude_rank <= 0.05
#   )

Code

skim(lutomska_norm)

Data summary
Name	lutomska_norm
Number of rows	18768
Number of columns	17
_______________________
Column type frequency:
character	4
numeric	13
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
source	1	11	16	4
target	1	11	16	4
ligand_complex	1	2	16	267
receptor_complex	1	3	19	284

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
…1	1	9383.50	5418.00	0.00	4691.75	9383.50	14075.25	18767.00	▇▇▇▇▇
lr_means	1	0.11	0.28	0.02	0.02	0.02	0.02	3.69	▇▁▁▁▁
cellphone_pvals	1	0.86	0.33	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇
expr_prod	1	0.06	0.49	0.00	0.00	0.00	0.00	13.31	▇▁▁▁▁
scaled_weight	1	-0.94	0.67	-1.27	-1.27	-1.27	-1.27	4.34	▇▂▁▁▁
lr_logfc	1	-1.74	0.92	-2.21	-2.21	-2.21	-2.21	3.41	▇▁▁▁▁
spec_weight	1	0.02	0.07	0.00	0.00	0.00	0.00	1.00	▇▁▁▁▁
lrscore	1	0.22	0.21	0.12	0.12	0.12	0.12	0.96	▇▁▁▁▁
lr_probs	1	0.00	0.01	0.00	0.00	0.00	0.00	0.32	▇▁▁▁▁
cellchat_pvals	1	0.95	0.21	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇
steady_rank	1	0.80	0.39	0.00	1.00	1.00	1.00	1.00	▂▁▁▁▇
specificity_rank	1	0.81	0.38	0.00	1.00	1.00	1.00	1.00	▂▁▁▁▇
magnitude_rank	1	0.91	0.26	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇

Code

skim(lutomska_hfhsd)

Data summary
Name	lutomska_hfhsd
Number of rows	19936
Number of columns	17
_______________________
Column type frequency:
character	4
numeric	13
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
source	1	11	16	4
target	1	11	16	4
ligand_complex	1	2	16	304
receptor_complex	1	3	19	320

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
…1	1	9967.50	5755.17	0.00	4983.75	9967.50	14951.25	19935.00	▇▇▇▇▇
lr_means	1	0.10	0.26	0.02	0.02	0.02	0.02	3.71	▇▁▁▁▁
cellphone_pvals	1	0.88	0.32	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇
expr_prod	1	0.05	0.44	0.00	0.00	0.00	0.00	13.41	▇▁▁▁▁
scaled_weight	1	-1.14	0.70	-1.45	-1.45	-1.45	-1.45	4.60	▇▂▁▁▁
lr_logfc	1	-1.88	0.91	-2.29	-2.29	-2.29	-2.29	4.02	▇▂▁▁▁
spec_weight	1	0.02	0.07	0.00	0.00	0.00	0.00	0.97	▇▁▁▁▁
lrscore	1	0.23	0.19	0.14	0.14	0.14	0.14	0.96	▇▁▁▁▁
lr_probs	1	0.00	0.01	0.00	0.00	0.00	0.00	0.33	▇▁▁▁▁
cellchat_pvals	1	0.96	0.20	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇
steady_rank	1	0.83	0.37	0.00	1.00	1.00	1.00	1.00	▂▁▁▁▇
specificity_rank	1	0.84	0.36	0.00	1.00	1.00	1.00	1.00	▂▁▁▁▇
magnitude_rank	1	0.91	0.26	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇

Code

skim(deng_norm)

Data summary
Name	deng_norm
Number of rows	19696
Number of columns	17
_______________________
Column type frequency:
character	4
numeric	13
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
source	1	11	16	4
target	1	11	16	4
ligand_complex	1	2	16	290
receptor_complex	1	3	19	314

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
…1	1	9847.50	5685.89	0.00	4923.75	9847.50	14771.25	19695.00	▇▇▇▇▇
lr_means	1	0.12	0.29	0.01	0.01	0.01	0.09	3.71	▇▁▁▁▁
cellphone_pvals	1	0.83	0.35	0.00	1.00	1.00	1.00	1.00	▂▁▁▁▇
expr_prod	1	0.06	0.50	0.00	0.00	0.00	0.00	13.45	▇▁▁▁▁
scaled_weight	1	-0.89	0.68	-1.30	-1.30	-1.30	-0.21	2.35	▇▂▁▁▁
lr_logfc	1	-2.18	1.38	-3.05	-3.05	-3.05	-0.44	4.22	▇▁▂▁▁
spec_weight	1	0.03	0.06	0.00	0.00	0.00	0.02	0.91	▇▁▁▁▁
lrscore	1	0.20	0.23	0.07	0.07	0.07	0.29	0.95	▇▁▁▁▁
lr_probs	1	0.00	0.01	0.00	0.00	0.00	0.00	0.32	▇▁▁▁▁
cellchat_pvals	1	0.92	0.26	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇
steady_rank	1	0.76	0.41	0.00	0.32	1.00	1.00	1.00	▂▁▁▁▇
specificity_rank	1	0.77	0.39	0.00	0.49	1.00	1.00	1.00	▂▁▁▁▇
magnitude_rank	1	0.89	0.29	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇

Code

skim(deng_hfd)

Data summary
Name	deng_hfd
Number of rows	20208
Number of columns	17
_______________________
Column type frequency:
character	4
numeric	13
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
source	1	11	16	4
target	1	11	16	4
ligand_complex	1	2	16	298
receptor_complex	1	3	19	318

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
…1	1	10103.50	5833.69	0.00	5051.75	10103.50	15155.25	20207.00	▇▇▇▇▇
lr_means	1	0.12	0.29	0.02	0.02	0.02	0.09	3.75	▇▁▁▁▁
cellphone_pvals	1	0.83	0.35	0.00	1.00	1.00	1.00	1.00	▂▁▁▁▇
expr_prod	1	0.07	0.53	0.00	0.00	0.00	0.00	13.79	▇▁▁▁▁
scaled_weight	1	-0.71	0.57	-1.05	-1.05	-1.05	-0.19	3.42	▇▂▁▁▁
lr_logfc	1	-1.44	0.91	-2.01	-2.01	-2.01	-0.41	2.46	▇▁▂▁▁
spec_weight	1	0.03	0.06	0.00	0.00	0.00	0.02	0.81	▇▁▁▁▁
lrscore	1	0.21	0.22	0.09	0.09	0.09	0.28	0.96	▇▁▁▁▁
lr_probs	1	0.00	0.02	0.00	0.00	0.00	0.00	0.39	▇▁▁▁▁
cellchat_pvals	1	0.93	0.24	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇
steady_rank	1	0.75	0.40	0.00	0.32	1.00	1.00	1.00	▂▁▁▁▇
specificity_rank	1	0.77	0.39	0.00	0.50	1.00	1.00	1.00	▂▁▁▁▇
magnitude_rank	1	0.89	0.28	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇

Code

# skim(lopez_norm)

Code

# skim(lopez_csds)

Merge datasets

Code

lutomska_norm <-
  lutomska_norm |>
  mutate(
    region = "ARC",
    condition = "HFHSD",
    control = TRUE
  )

lutomska_hfhsd <-
  lutomska_hfhsd |>
  mutate(
    region = "ARC",
    condition = "HFHSD",
    control = FALSE
  )

deng_norm <-
  deng_norm |>
  mutate(
    region = "ARC",
    condition = "HFD",
    control = TRUE
  )

deng_hfd <-
  deng_hfd |>
  mutate(
    region = "ARC",
    condition = "HFD",
    control = FALSE
  )

# lopez_norm <-
#   lopez_norm |>
#   mutate(
#     region = "PVN",
#     condition = "CSDS",
#     control = TRUE
#   )
# 
# lopez_csds <-
#   lopez_csds |>
#   mutate(
#     region = "PVN",
#     condition = "CSDS",
#     control = FALSE
#   )

df_complex_conditions <-
  bind_rows(
    # lopez_norm,
    # lopez_csds,
    lutomska_norm,
    lutomska_hfhsd,
    deng_norm,
    deng_hfd
  )
df_complex_conditions <-
  df_complex_conditions |>
  mutate(pairs = str_c(source, target, sep = "==>")) |>
  filter(
    pairs %in% c(
      "Astro_INSR+==>POMC neurons",
      "Astro_INSR-==>POMC neurons",
      "Astro_INSR+==>AGRP/NPY neurons",
      "Astro_INSR-==>AGRP/NPY neurons",
      "POMC neurons==>Astro_INSR+",
      "AGRP/NPY neurons==>Astro_INSR+",
      "POMC neurons==>Astro_INSR-",
      "AGRP/NPY neurons==>Astro_INSR-"
    ),
    cellchat_pvals < 1, cellphone_pvals < 1
  )

Deng 2020

Signalling pairs

How do insuline receptor positive or negative astrocytes coupled with POMC neurons?

Code

df <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>POMC neurons",
      "Astro_INSR-==>POMC neurons"
    ),
    condition == "HFD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    neg_log10_magnitude_rank = -log10(magnitude_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, lr_probs, control)

df

Code

skim(df)

Data summary
Name	df
Number of rows	126
Number of columns	6
_______________________
Column type frequency:
character	4
logical	1
numeric	1
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
pairs	1	26	26	2
lr_complex	1	8	27	68
ligand_complex	1	3	7	20
receptor_complex	1	3	19	51

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
control	0	1	1	TRU: 126

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
lr_probs	0	1	0.02	0.05	0	0	0.01	0.02	0.29	▇▁▁▁▁

Code

df_specificity <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>POMC neurons",
      "Astro_INSR-==>POMC neurons"
    ),
    condition == "HFD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, neg_log10_specificity_rank, control)

# Reshape the dataframe to a wider format
df_wide <- df %>%
  pivot_wider(names_from = pairs, values_from = lr_probs)

# Calculate the difference and categorize it
df_wide <- df_wide %>%
  mutate(diff_rank = `Astro_INSR+==>POMC neurons` - `Astro_INSR-==>POMC neurons`) %>%
  mutate(diff_category = ifelse(abs(diff_rank) > 0.001 | is.na(abs(diff_rank)), "big difference", "small difference"))

df_wide

Code

skim(df_wide)

Data summary
Name	df_wide
Number of rows	68
Number of columns	8
_______________________
Column type frequency:
character	4
logical	1
numeric	3
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
lr_complex	1	8	27	68
ligand_complex	1	3	7	20
receptor_complex	1	3	19	51
diff_category	1	14	16	2

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
control	0	1	1	TRU: 68

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p50	p75	p100	hist
Astro_INSR+==>POMC neurons	8	0.88	0.02	0.05	0.01	0.02	0.29	▇▁▁▁▁
Astro_INSR-==>POMC neurons	2	0.97	0.02	0.05	0.01	0.02	0.29	▇▁▁▁▁
diff_rank	10	0.85	0.00	0.00	0.00	0.00	0.01	▇▁▁▁▁

Code

# Reshape the dataframe back to a long format
df_long <- df_wide %>%
  pivot_longer(cols = c(`Astro_INSR+==>POMC neurons`, `Astro_INSR-==>POMC neurons`), names_to = "pairs", values_to = "lr_probs") %>%
  full_join(df_specificity, by = c("pairs", "lr_complex", "ligand_complex", "receptor_complex", "control"))

df_long

Code

skim(df_long)

Data summary
Name	df_long
Number of rows	136
Number of columns	9
_______________________
Column type frequency:
character	5
logical	1
numeric	3
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
lr_complex	1	8	27	68
ligand_complex	1	3	7	20
receptor_complex	1	3	19	51
diff_category	1	14	16	2
pairs	1	26	26	2

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
control	0	1	1	TRU: 136

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
diff_rank	20	0.85	0.00	0.00	0.00	0.00	0.00	0.00	0.01	▇▁▁▁▁
lr_probs	10	0.93	0.02	0.05	0.00	0.00	0.01	0.02	0.29	▇▁▁▁▁
neg_log10_specificity_rank	10	0.93	2.62	1.15	1.19	1.52	2.57	3.27	6.56	▇▆▃▁▁

Code

# Prepare the top 20 data
top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 40) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = lr_probs, n = 10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = diff_rank, n = 15) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 60) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 20) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10) %>%
  distinct(lr_complex, diff_category, pairs, .keep_all = TRUE) %>%
  filter(!ligand_complex %in% c("Pomc", "Agrp", "Npy"))

top_bot_10

Code

skim(top_bot_10)

Data summary
Name	top_bot_10
Number of rows	79
Number of columns	9
_______________________
Column type frequency:
character	4
logical	1
numeric	3
________________________
Group variables	pairs

Variable type: character

skim_variable	pairs	complete_rate	min	max	n_unique
lr_complex	Astro_INSR+==>POMC neurons	1	8	18	37
lr_complex	Astro_INSR-==>POMC neurons	1	8	18	42
ligand_complex	Astro_INSR+==>POMC neurons	1	3	7	17
ligand_complex	Astro_INSR-==>POMC neurons	1	3	7	16
receptor_complex	Astro_INSR+==>POMC neurons	1	3	11	29
receptor_complex	Astro_INSR-==>POMC neurons	1	3	11	32
diff_category	Astro_INSR+==>POMC neurons	1	14	16	2
diff_category	Astro_INSR-==>POMC neurons	1	14	16	2

Variable type: logical

skim_variable	pairs	n_missing	complete_rate	mean	count
control	Astro_INSR+==>POMC neurons	0	1	1	TRU: 37
control	Astro_INSR-==>POMC neurons	0	1	1	TRU: 42

Variable type: numeric

skim_variable	pairs	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
diff_rank	Astro_INSR+==>POMC neurons	4	0.89	0.00	0.00	0.00	0.00	0.00	0.00	0.01	▇▁▁▁▁
diff_rank	Astro_INSR-==>POMC neurons	9	0.79	0.00	0.00	0.00	0.00	0.00	0.00	0.01	▇▁▁▁▁
lr_probs	Astro_INSR+==>POMC neurons	2	0.95	0.03	0.06	0.00	0.00	0.01	0.04	0.29	▇▁▁▁▁
lr_probs	Astro_INSR-==>POMC neurons	1	0.98	0.03	0.06	0.00	0.00	0.01	0.04	0.29	▇▁▁▁▁
neg_log10_specificity_rank	Astro_INSR+==>POMC neurons	2	0.95	2.97	1.15	1.26	2.25	2.93	3.36	6.56	▅▇▃▁▁
neg_log10_specificity_rank	Astro_INSR-==>POMC neurons	1	0.98	2.96	1.26	1.19	2.06	2.93	3.84	6.56	▇▆▇▁▁

Code

# Generate the plot
pr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pr <- pr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>POMC neurons",
      diff_rank >= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>POMC neurons",
      diff_rank <= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pr)

Code

pl <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pl <- pl + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>POMC neurons",
      diff_rank >= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>POMC neurons",
      diff_rank <= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pl)

Code

plr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
plr <- plr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR+==>POMC neurons" & 
        diff_rank >= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR+==>POMC neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR-==>POMC neurons" & 
        diff_rank <= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR-==>POMC neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(plr)

How do insuline receptor positive or negative astrocytes coupled with AGRP/NPY neurons?

Code

df <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>AGRP/NPY neurons",
      "Astro_INSR-==>AGRP/NPY neurons"
    ),
    condition == "HFD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    neg_log10_magnitude_rank = -log10(magnitude_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, lr_probs, control)

df

Code

skim(df)

Data summary
Name	df
Number of rows	129
Number of columns	6
_______________________
Column type frequency:
character	4
logical	1
numeric	1
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
pairs	1	30	30	2
lr_complex	1	8	27	69
ligand_complex	1	3	7	22
receptor_complex	1	3	19	53

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
control	0	1	1	TRU: 129

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
lr_probs	0	1	0.03	0.05	0	0	0.01	0.02	0.32	▇▁▁▁▁

Code

df_specificity <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>AGRP/NPY neurons",
      "Astro_INSR-==>AGRP/NPY neurons"
    ),
    condition == "HFD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, neg_log10_specificity_rank, control)

# Reshape the dataframe to a wider format
df_wide <- df %>%
  pivot_wider(names_from = pairs, values_from = lr_probs)

# Calculate the difference and categorize it
df_wide <- df_wide %>%
  mutate(diff_rank = `Astro_INSR+==>AGRP/NPY neurons` - `Astro_INSR-==>AGRP/NPY neurons`) %>%
  mutate(diff_category = ifelse(abs(diff_rank) > 0.001 | is.na(abs(diff_rank)), "big difference", "small difference"))

df_wide

Code

skim(df_wide)

Data summary
Name	df_wide
Number of rows	69
Number of columns	8
_______________________
Column type frequency:
character	4
logical	1
numeric	3
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
lr_complex	1	8	27	69
ligand_complex	1	3	7	22
receptor_complex	1	3	19	53
diff_category	1	14	16	2

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
control	0	1	1	TRU: 69

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p50	p75	p100	hist
Astro_INSR+==>AGRP/NPY neurons	7	0.90	0.03	0.05	0.01	0.02	0.32	▇▁▁▁▁
Astro_INSR-==>AGRP/NPY neurons	2	0.97	0.03	0.05	0.01	0.02	0.31	▇▁▁▁▁
diff_rank	9	0.87	0.00	0.00	0.00	0.00	0.01	▇▁▁▁▁

Code

# Reshape the dataframe back to a long format
df_long <- df_wide %>%
  pivot_longer(cols = c(`Astro_INSR+==>AGRP/NPY neurons`, `Astro_INSR-==>AGRP/NPY neurons`), names_to = "pairs", values_to = "lr_probs") %>%
  full_join(df_specificity, by = c("pairs", "lr_complex", "ligand_complex", "receptor_complex", "control"))

df_long

Code

skim(df_long)

Data summary
Name	df_long
Number of rows	138
Number of columns	9
_______________________
Column type frequency:
character	5
logical	1
numeric	3
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
lr_complex	1	8	27	69
ligand_complex	1	3	7	22
receptor_complex	1	3	19	53
diff_category	1	14	16	2
pairs	1	30	30	2

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
control	0	1	1	TRU: 138

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
diff_rank	18	0.87	0.00	0.00	0.00	0.00	0.00	0.00	0.01	▇▁▁▁▁
lr_probs	9	0.93	0.03	0.05	0.00	0.00	0.01	0.02	0.32	▇▁▁▁▁
neg_log10_specificity_rank	9	0.93	2.93	1.29	1.24	2.05	2.58	3.62	6.56	▇▆▃▂▁

Code

# Prepare the top 20 data
top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 40) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = lr_probs, n = 10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = diff_rank, n = 15) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 60) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 20) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10) %>%
  distinct(lr_complex, diff_category, pairs, .keep_all = TRUE) %>%
  filter(!ligand_complex %in% c("Pomc", "Agrp", "Npy"))

top_bot_10

Code

skim(top_bot_10)

Data summary
Name	top_bot_10
Number of rows	75
Number of columns	9
_______________________
Column type frequency:
character	4
logical	1
numeric	3
________________________
Group variables	pairs

Variable type: character

skim_variable	pairs	complete_rate	min	max	n_unique
lr_complex	Astro_INSR+==>AGRP/NPY neurons	1	8	27	35
lr_complex	Astro_INSR-==>AGRP/NPY neurons	1	8	27	40
ligand_complex	Astro_INSR+==>AGRP/NPY neurons	1	3	7	16
ligand_complex	Astro_INSR-==>AGRP/NPY neurons	1	3	7	17
receptor_complex	Astro_INSR+==>AGRP/NPY neurons	1	3	19	27
receptor_complex	Astro_INSR-==>AGRP/NPY neurons	1	3	19	29
diff_category	Astro_INSR+==>AGRP/NPY neurons	1	14	16	2
diff_category	Astro_INSR-==>AGRP/NPY neurons	1	14	16	2

Variable type: logical

skim_variable	pairs	n_missing	complete_rate	mean	count
control	Astro_INSR+==>AGRP/NPY neurons	0	1	1	TRU: 35
control	Astro_INSR-==>AGRP/NPY neurons	0	1	1	TRU: 40

Variable type: numeric

skim_variable	pairs	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
diff_rank	Astro_INSR+==>AGRP/NPY neurons	4	0.89	0.00	0.00	0.00	0.00	0.00	0.00	0.01	▇▂▁▁▁
diff_rank	Astro_INSR-==>AGRP/NPY neurons	8	0.80	0.00	0.00	0.00	0.00	0.00	0.00	0.01	▇▂▁▁▁
lr_probs	Astro_INSR+==>AGRP/NPY neurons	2	0.94	0.05	0.07	0.00	0.00	0.02	0.05	0.32	▇▁▁▁▁
lr_probs	Astro_INSR-==>AGRP/NPY neurons	1	0.98	0.04	0.06	0.00	0.00	0.02	0.05	0.31	▇▁▁▁▁
neg_log10_specificity_rank	Astro_INSR+==>AGRP/NPY neurons	2	0.94	3.43	1.27	1.31	2.59	3.18	4.24	6.56	▅▇▃▂▂
neg_log10_specificity_rank	Astro_INSR-==>AGRP/NPY neurons	1	0.98	3.39	1.34	1.24	2.25	3.31	4.23	6.56	▇▇▇▃▂

Code

# Generate the plot
pr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pr <- pr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>AGRP/NPY neurons",
      diff_rank >= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>AGRP/NPY neurons",
      diff_rank <= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pr)

Code

pl <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pl <- pl + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>AGRP/NPY neurons",
      diff_rank >= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>AGRP/NPY neurons",
      diff_rank <= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pl)

Code

plr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
plr <- plr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR+==>AGRP/NPY neurons" & 
        diff_rank >= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR+==>AGRP/NPY neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR-==>AGRP/NPY neurons" & 
        diff_rank <= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR-==>AGRP/NPY neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(plr)

Lutomska 2022

Signalling pairs

How do insuline receptor positive or negative astrocytes coupled with POMC neurons?

Code

df <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>POMC neurons",
      "Astro_INSR-==>POMC neurons"
    ),
    condition == "HFHSD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    neg_log10_magnitude_rank = -log10(magnitude_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, lr_probs, control)

df

Code

skim(df)

Data summary
Name	df
Number of rows	143
Number of columns	6
_______________________
Column type frequency:
character	4
logical	1
numeric	1
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
pairs	1	26	26	2
lr_complex	1	8	20	90
ligand_complex	1	3	7	30
receptor_complex	1	3	13	58

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
control	0	1	1	TRU: 143

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
lr_probs	0	1	0.02	0.05	0	0	0.01	0.02	0.31	▇▁▁▁▁

Code

df_specificity <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>POMC neurons",
      "Astro_INSR-==>POMC neurons"
    ),
    condition == "HFHSD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, neg_log10_specificity_rank, control)

# Reshape the dataframe to a wider format
df_wide <- df %>%
  pivot_wider(names_from = pairs, values_from = lr_probs)

# Calculate the difference and categorize it
df_wide <- df_wide %>%
  mutate(diff_rank = `Astro_INSR+==>POMC neurons` - `Astro_INSR-==>POMC neurons`) %>%
  mutate(diff_category = ifelse(abs(diff_rank) > 0.001 | is.na(abs(diff_rank)), "big difference", "small difference"))

df_wide

Code

skim(df_wide)

Data summary
Name	df_wide
Number of rows	90
Number of columns	8
_______________________
Column type frequency:
character	4
logical	1
numeric	3
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
lr_complex	1	8	20	90
ligand_complex	1	3	7	30
receptor_complex	1	3	13	58
diff_category	1	14	16	2

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
control	0	1	1	TRU: 90

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p50	p75	p100	hist
Astro_INSR+==>POMC neurons	0	1.00	0.02	0.05	0.01	0.02	0.31	▇▁▁▁▁
Astro_INSR-==>POMC neurons	37	0.59	0.03	0.05	0.01	0.02	0.30	▇▁▁▁▁
diff_rank	37	0.59	0.01	0.01	0.00	0.01	0.03	▇▅▃▁▁

Code

# Reshape the dataframe back to a long format
df_long <- df_wide %>%
  pivot_longer(cols = c(`Astro_INSR+==>POMC neurons`, `Astro_INSR-==>POMC neurons`), names_to = "pairs", values_to = "lr_probs") %>%
  full_join(df_specificity, by = c("pairs", "lr_complex", "ligand_complex", "receptor_complex", "control"))

df_long

Code

skim(df_long)

Data summary
Name	df_long
Number of rows	180
Number of columns	9
_______________________
Column type frequency:
character	5
logical	1
numeric	3
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
lr_complex	1	8	20	90
ligand_complex	1	3	7	30
receptor_complex	1	3	13	58
diff_category	1	14	16	2
pairs	1	26	26	2

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
control	0	1	1	TRU: 180

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
diff_rank	74	0.59	0.01	0.01	0.00	0.00	0.00	0.01	0.03	▇▅▃▁▁
lr_probs	37	0.79	0.02	0.05	0.00	0.00	0.01	0.02	0.31	▇▁▁▁▁
neg_log10_specificity_rank	37	0.79	3.41	1.29	1.69	2.28	3.32	4.42	7.13	▇▃▆▂▁

Code

# Prepare the top 20 data
top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 40) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = lr_probs, n = 10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = diff_rank, n = 15) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 60) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 20) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10) %>%
  distinct(lr_complex, diff_category, pairs, .keep_all = TRUE) %>%
  filter(!ligand_complex %in% c("Pomc", "Agrp", "Npy"))

top_bot_10

Code

skim(top_bot_10)

Data summary
Name	top_bot_10
Number of rows	68
Number of columns	9
_______________________
Column type frequency:
character	4
logical	1
numeric	3
________________________
Group variables	pairs

Variable type: character

skim_variable	pairs	complete_rate	min	max	n_unique
lr_complex	Astro_INSR+==>POMC neurons	1	8	19	35
lr_complex	Astro_INSR-==>POMC neurons	1	8	19	33
ligand_complex	Astro_INSR+==>POMC neurons	1	3	6	16
ligand_complex	Astro_INSR-==>POMC neurons	1	3	6	14
receptor_complex	Astro_INSR+==>POMC neurons	1	3	11	27
receptor_complex	Astro_INSR-==>POMC neurons	1	3	11	25
diff_category	Astro_INSR+==>POMC neurons	1	14	16	2
diff_category	Astro_INSR-==>POMC neurons	1	14	16	2

Variable type: logical

skim_variable	pairs	n_missing	complete_rate	mean	count
control	Astro_INSR+==>POMC neurons	0	1	1	TRU: 35
control	Astro_INSR-==>POMC neurons	0	1	1	TRU: 33

Variable type: numeric

skim_variable	pairs	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
diff_rank	Astro_INSR+==>POMC neurons	1	0.97	0.01	0.01	0.00	0.00	0.00	0.01	0.03	▇▃▂▁▁
diff_rank	Astro_INSR-==>POMC neurons	0	1.00	0.01	0.01	0.00	0.00	0.00	0.01	0.03	▇▃▂▁▁
lr_probs	Astro_INSR+==>POMC neurons	0	1.00	0.05	0.07	0.00	0.00	0.02	0.06	0.31	▇▂▁▁▁
lr_probs	Astro_INSR-==>POMC neurons	0	1.00	0.04	0.07	0.00	0.00	0.01	0.05	0.30	▇▁▁▁▁
neg_log10_specificity_rank	Astro_INSR+==>POMC neurons	0	1.00	3.34	1.40	1.69	2.17	2.86	4.49	7.13	▇▃▃▂▁
neg_log10_specificity_rank	Astro_INSR-==>POMC neurons	0	1.00	3.02	1.09	1.78	2.28	2.93	3.54	5.91	▇▅▂▂▁

Code

# Generate the plot
pr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pr <- pr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>POMC neurons",
      diff_rank >= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>POMC neurons",
      diff_rank <= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pr)

Code

pl <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pl <- pl + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>POMC neurons",
      diff_rank >= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>POMC neurons",
      diff_rank <= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pl)

Code

plr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
plr <- plr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR+==>POMC neurons" & 
        diff_rank >= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR+==>POMC neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR-==>POMC neurons" & 
        diff_rank <= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR-==>POMC neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(plr)

How do insuline receptor positive or negative astrocytes coupled with AGRP/NPY neurons?

Code

df <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>AGRP/NPY neurons",
      "Astro_INSR-==>AGRP/NPY neurons"
    ),
    condition == "HFHSD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    neg_log10_magnitude_rank = -log10(magnitude_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, lr_probs, control)

df

Code

skim(df)

Data summary
Name	df
Number of rows	117
Number of columns	6
_______________________
Column type frequency:
character	4
logical	1
numeric	1
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
pairs	1	30	30	2
lr_complex	1	8	20	75
ligand_complex	1	3	7	28
receptor_complex	1	3	13	49

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
control	0	1	1	TRU: 117

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
lr_probs	0	1	0.02	0.04	0	0	0	0.02	0.26	▇▁▁▁▁

Code

df_specificity <- df_complex_conditions %>%
  filter(
    pairs %in% c(
      "Astro_INSR+==>AGRP/NPY neurons",
      "Astro_INSR-==>AGRP/NPY neurons"
    ),
    condition == "HFHSD",
    control == TRUE
  ) %>%
  mutate(
    neg_log10_specificity_rank = -log10(specificity_rank),
    lr_complex = str_c(ligand_complex, receptor_complex, sep = "–>")
  ) %>%
  select(pairs, lr_complex, ligand_complex, receptor_complex, neg_log10_specificity_rank, control)

# Reshape the dataframe to a wider format
df_wide <- df %>%
  pivot_wider(names_from = pairs, values_from = lr_probs)

# Calculate the difference and categorize it
df_wide <- df_wide %>%
  mutate(diff_rank = `Astro_INSR+==>AGRP/NPY neurons` - `Astro_INSR-==>AGRP/NPY neurons`) %>%
  mutate(diff_category = ifelse(abs(diff_rank) > 0.001 | is.na(abs(diff_rank)), "big difference", "small difference"))

df_wide

Code

skim(df_wide)

Data summary
Name	df_wide
Number of rows	75
Number of columns	8
_______________________
Column type frequency:
character	4
logical	1
numeric	3
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
lr_complex	1	8	20	75
ligand_complex	1	3	7	28
receptor_complex	1	3	13	49
diff_category	1	14	16	2

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
control	0	1	1	TRU: 75

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p50	p75	p100	hist
Astro_INSR+==>AGRP/NPY neurons	0	1.00	0.02	0.04	0.00	0.02	0.26	▇▁▁▁▁
Astro_INSR-==>AGRP/NPY neurons	33	0.56	0.02	0.04	0.01	0.01	0.21	▇▁▁▁▁
diff_rank	33	0.56	0.00	0.00	0.00	0.01	0.02	▇▃▂▁▁

Code

# Reshape the dataframe back to a long format
df_long <- df_wide %>%
  pivot_longer(cols = c(`Astro_INSR+==>AGRP/NPY neurons`, `Astro_INSR-==>AGRP/NPY neurons`), names_to = "pairs", values_to = "lr_probs") %>%
  full_join(df_specificity, by = c("pairs", "lr_complex", "ligand_complex", "receptor_complex", "control"))

df_long

Code

skim(df_long)

Data summary
Name	df_long
Number of rows	150
Number of columns	9
_______________________
Column type frequency:
character	5
logical	1
numeric	3
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
lr_complex	1	8	20	75
ligand_complex	1	3	7	28
receptor_complex	1	3	13	49
diff_category	1	14	16	2
pairs	1	30	30	2

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
control	0	1	1	TRU: 150

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
diff_rank	66	0.56	0.00	0.00	0.00	0.00	0.0	0.01	0.02	▇▃▂▁▁
lr_probs	33	0.78	0.02	0.04	0.00	0.00	0.0	0.02	0.26	▇▁▁▁▁
neg_log10_specificity_rank	33	0.78	3.18	1.21	1.72	2.15	3.2	3.65	7.13	▇▇▂▁▁

Code

# Prepare the top 20 data
top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 40) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = lr_probs, n = 10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "big difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = diff_rank, n = 15) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    !ligand_complex %in% df_long$receptor_complex,
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 60) %>%
  ungroup() %>%
  group_by(pairs) %>%
  slice_max(order_by = neg_log10_specificity_rank, n = 20) %>%
  bind_rows(top_bot_10)

top_bot_10 <- df_long %>%
  filter(
    diff_category == "small difference"
  ) %>%
  group_by(pairs) %>%
  distinct(ligand_complex, diff_category, lr_probs, pairs, .keep_all = TRUE) %>%
  slice_max(order_by = lr_probs, n = 10) %>%
  bind_rows(top_bot_10) %>%
  distinct(lr_complex, diff_category, pairs, .keep_all = TRUE) %>%
  filter(!ligand_complex %in% c("Pomc", "Agrp", "Npy"))

top_bot_10

Code

skim(top_bot_10)

Data summary
Name	top_bot_10
Number of rows	58
Number of columns	9
_______________________
Column type frequency:
character	4
logical	1
numeric	3
________________________
Group variables	pairs

Variable type: character

skim_variable	pairs	complete_rate	min	max	n_unique
lr_complex	Astro_INSR+==>AGRP/NPY neurons	1	8	19	31
lr_complex	Astro_INSR-==>AGRP/NPY neurons	1	8	19	27
ligand_complex	Astro_INSR+==>AGRP/NPY neurons	1	3	6	16
ligand_complex	Astro_INSR-==>AGRP/NPY neurons	1	3	6	15
receptor_complex	Astro_INSR+==>AGRP/NPY neurons	1	3	11	19
receptor_complex	Astro_INSR-==>AGRP/NPY neurons	1	3	11	17
diff_category	Astro_INSR+==>AGRP/NPY neurons	1	14	16	2
diff_category	Astro_INSR-==>AGRP/NPY neurons	1	14	16	2

Variable type: logical

skim_variable	pairs	n_missing	complete_rate	mean	count
control	Astro_INSR+==>AGRP/NPY neurons	0	1	1	TRU: 31
control	Astro_INSR-==>AGRP/NPY neurons	0	1	1	TRU: 27

Variable type: numeric

skim_variable	pairs	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
diff_rank	Astro_INSR+==>AGRP/NPY neurons	4	0.87	0.01	0.01	0.00	0.00	0.00	0.01	0.02	▇▆▅▁▁
diff_rank	Astro_INSR-==>AGRP/NPY neurons	0	1.00	0.01	0.01	0.00	0.00	0.00	0.01	0.02	▇▆▅▁▁
lr_probs	Astro_INSR+==>AGRP/NPY neurons	0	1.00	0.04	0.06	0.00	0.01	0.02	0.05	0.26	▇▂▁▁▁
lr_probs	Astro_INSR-==>AGRP/NPY neurons	0	1.00	0.03	0.04	0.00	0.00	0.01	0.03	0.21	▇▂▁▁▁
neg_log10_specificity_rank	Astro_INSR+==>AGRP/NPY neurons	0	1.00	3.23	1.37	1.72	2.02	3.20	3.78	7.13	▇▇▃▁▁
neg_log10_specificity_rank	Astro_INSR-==>AGRP/NPY neurons	0	1.00	3.05	1.12	1.84	2.27	3.04	3.26	6.87	▇▇▁▁▁

Code

# Generate the plot
pr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pr <- pr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>AGRP/NPY neurons",
      diff_rank >= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>AGRP/NPY neurons",
      diff_rank <= 0
    ),
  aes(
    label = receptor_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pr)

Code

pl <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
pl <- pl + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR+==>AGRP/NPY neurons",
      diff_rank >= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      pairs == "Astro_INSR-==>AGRP/NPY neurons",
      diff_rank <= 0
    ),
  aes(
    label = ligand_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(pl)

Code

plr <- ggplot(df_long, aes(x = pairs, y = lr_probs, color = as.factor(diff_category), size = neg_log10_specificity_rank)) +
  geom_jitter(width = 0.05) +
  geom_line(aes(group = lr_complex), linetype = "solid", alpha = 0.2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  facet_wrap(~diff_category)

# Add text labels for min and max 5 values within each group using ggrepel
plr <- plr + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR+==>AGRP/NPY neurons" & 
        diff_rank >= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR+==>AGRP/NPY neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = 0.2, direction = "y", hjust = "left",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) + geom_text_repel(
  data = top_bot_10 %>%
    filter(
      (pairs == "Astro_INSR-==>AGRP/NPY neurons" & 
        diff_rank <= 0) |
        (!is.na(lr_probs) &
           pairs == "Astro_INSR-==>AGRP/NPY neurons" & 
           is.na(diff_rank))
    ),
  aes(
    label = lr_complex,
    point.size = neg_log10_specificity_rank
  ), # data point size
  nudge_x = -.2, direction = "y", hjust = "right",
  size = 6, # font size in the text labels
  point.padding = 0.2, # additional padding around each point
  min.segment.length = 0, # draw all line segments
  max.time = 5, max.iter = 1e6, # stop after 1 second, or after 100,000 iterations
  seed = reseed,
  max.overlaps = Inf,
  box.padding = 0.5
) +
  scale_x_discrete(expand = expansion(add = 2))

print(plr)

Session info

Code

sessioninfo::session_info()

─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.2.3 (2023-03-15)
 os       macOS 14.0
 system   aarch64, darwin20
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       Europe/Vienna
 date     2023-07-31
 pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 package          * version    date (UTC) lib source
 base64enc          0.1-3      2015-07-28 [1] CRAN (R 4.2.0)
 bayestestR         0.13.1     2023-04-07 [1] RSPM (R 4.2.3)
 bit                4.0.5      2022-11-15 [1] CRAN (R 4.2.0)
 bit64              4.0.5      2020-08-30 [1] CRAN (R 4.2.0)
 boot               1.3-28.1   2022-11-22 [1] CRAN (R 4.2.3)
 cli                3.6.1      2023-03-23 [1] RSPM (R 4.2.3)
 coda               0.19-4     2020-09-30 [1] CRAN (R 4.2.0)
 codetools          0.2-19     2023-02-01 [1] CRAN (R 4.2.3)
 colorspace         2.1-0      2023-01-23 [1] CRAN (R 4.2.0)
 ComplexUpset     * 1.3.5      2022-11-28 [1] Github (krassowski/complex-upset@bb3f10a)
 correlation        0.8.4      2023-04-06 [1] RSPM (R 4.2.3)
 cowplot          * 1.1.1      2020-12-30 [1] CRAN (R 4.2.0)
 crayon             1.5.2      2022-09-29 [1] CRAN (R 4.2.0)
 dabestr          * 0.3.0      2022-05-21 [1] Github (ACCLAB/dabestr@8775899)
 datawizard         0.7.1      2023-04-03 [1] RSPM (R 4.2.3)
 digest             0.6.31     2022-12-11 [1] CRAN (R 4.2.0)
 dplyr            * 1.1.2      2023-04-20 [1] RSPM (R 4.2.3)
 emmeans            1.8.5      2023-03-08 [1] RSPM (R 4.2.3)
 estimability       1.4.1      2022-08-05 [1] CRAN (R 4.2.0)
 evaluate           0.21       2023-05-05 [1] RSPM (R 4.2.3)
 fansi              1.0.4      2023-01-22 [1] CRAN (R 4.2.0)
 farver             2.1.1      2022-07-06 [1] CRAN (R 4.2.0)
 fastmap            1.1.1      2023-02-24 [1] RSPM (R 4.2.3)
 forcats          * 1.0.0      2023-01-29 [1] CRAN (R 4.2.0)
 future           * 1.33.0     2023-07-01 [1] RSPM (R 4.2.3)
 generics           0.1.3      2022-07-05 [1] CRAN (R 4.2.0)
 ggmin              0.0.0.9000 2022-08-12 [1] Github (sjessa/ggmin@8ada274)
 ggplot2          * 3.4.2      2023-04-03 [1] RSPM (R 4.2.3)
 ggrepel          * 0.9.3      2023-02-03 [1] RSPM (R 4.2.3)
 ggstatsplot      * 0.11.1     2023-04-14 [1] RSPM (R 4.2.3)
 ggupset          * 0.3.0.9002 2022-05-21 [1] Github (const-ae/ggupset@2a03c58)
 globals            0.16.2     2022-11-21 [1] CRAN (R 4.2.0)
 glue               1.6.2      2022-02-24 [1] CRAN (R 4.2.0)
 gtable             0.3.3      2023-03-21 [1] RSPM (R 4.2.3)
 here             * 1.0.1      2020-12-13 [1] CRAN (R 4.2.0)
 hms                1.1.3      2023-03-21 [1] RSPM (R 4.2.3)
 htmltools          0.5.5      2023-03-23 [1] RSPM (R 4.2.3)
 htmlwidgets        1.6.2      2023-03-17 [1] RSPM (R 4.2.3)
 insight            0.19.1     2023-03-18 [1] RSPM (R 4.2.3)
 jsonlite           1.8.4      2022-12-06 [1] CRAN (R 4.2.0)
 knitr              1.42       2023-01-25 [1] RSPM
 labeling           0.4.2      2020-10-20 [1] CRAN (R 4.2.0)
 lattice            0.21-8     2023-04-05 [1] RSPM (R 4.2.3)
 lifecycle          1.0.3      2022-10-07 [1] CRAN (R 4.2.0)
 listenv            0.9.0      2022-12-16 [1] CRAN (R 4.2.0)
 lubridate        * 1.9.2      2023-02-10 [1] RSPM (R 4.2.3)
 magrittr         * 2.0.3      2022-03-30 [1] CRAN (R 4.2.0)
 MASS               7.3-58.3   2023-03-07 [1] RSPM (R 4.2.3)
 Matrix             1.5-4      2023-04-04 [1] RSPM (R 4.2.3)
 multcomp           1.4-23     2023-03-09 [1] RSPM (R 4.2.3)
 munsell            0.5.0      2018-06-12 [1] CRAN (R 4.2.0)
 mvtnorm            1.1-3      2021-10-08 [1] CRAN (R 4.2.0)
 paletteer          1.5.0      2022-10-19 [1] CRAN (R 4.2.0)
 parallelly         1.36.0     2023-05-26 [1] RSPM (R 4.2.3)
 parameters         0.21.0     2023-04-19 [1] RSPM (R 4.2.3)
 patchwork        * 1.1.2.9000 2023-02-08 [1] Github (thomasp85/patchwork@c14c960)
 pillar             1.9.0      2023-03-22 [1] RSPM (R 4.2.3)
 pkgconfig          2.0.3      2019-09-22 [1] CRAN (R 4.2.0)
 png                0.1-8      2022-11-29 [1] CRAN (R 4.2.0)
 purrr            * 1.0.1      2023-01-10 [1] CRAN (R 4.2.0)
 R6                 2.5.1      2021-08-19 [1] CRAN (R 4.2.0)
 RColorBrewer     * 1.1-3      2022-04-03 [1] CRAN (R 4.2.0)
 Rcpp               1.0.10     2023-01-22 [1] CRAN (R 4.2.0)
 readr            * 2.1.4      2023-02-10 [1] RSPM (R 4.2.3)
 rematch2           2.1.2      2020-05-01 [1] CRAN (R 4.2.0)
 repr               1.1.6      2023-01-26 [1] CRAN (R 4.2.0)
 reticulate       * 1.30-9000  2023-07-30 [1] Github (rstudio/reticulate@9a1ed79)
 rlang              1.1.0      2023-03-14 [1] RSPM (R 4.2.3)
 rmarkdown          2.21       2023-03-26 [1] RSPM (R 4.2.3)
 rprojroot          2.0.3      2022-04-02 [1] CRAN (R 4.2.0)
 rstudioapi         0.14       2022-08-22 [1] CRAN (R 4.2.0)
 sandwich           3.0-2      2022-06-15 [1] CRAN (R 4.2.0)
 scales             1.2.1      2022-08-20 [1] CRAN (R 4.2.0)
 sessioninfo        1.2.2      2021-12-06 [1] CRAN (R 4.2.0)
 skimr            * 2.1.5      2023-02-08 [1] Github (ropensci/skimr@d5126aa)
 statsExpressions   1.5.0      2023-02-19 [1] RSPM (R 4.2.3)
 stringi            1.7.12     2023-01-11 [1] CRAN (R 4.2.0)
 stringr          * 1.5.0      2022-12-02 [1] CRAN (R 4.2.0)
 survival           3.5-5      2023-03-12 [1] RSPM (R 4.2.3)
 TH.data            1.1-2      2023-04-17 [1] RSPM (R 4.2.3)
 tibble           * 3.2.1      2023-03-20 [1] RSPM (R 4.2.3)
 tidyr            * 1.3.0      2023-01-24 [1] CRAN (R 4.2.0)
 tidyselect         1.2.0      2022-10-10 [1] CRAN (R 4.2.0)
 tidyverse        * 2.0.0      2023-02-22 [1] RSPM (R 4.2.3)
 timechange         0.2.0      2023-01-11 [1] CRAN (R 4.2.0)
 tzdb               0.3.0      2022-03-28 [1] CRAN (R 4.2.0)
 utf8               1.2.3      2023-01-31 [1] CRAN (R 4.2.0)
 vctrs              0.6.2      2023-04-19 [1] RSPM (R 4.2.3)
 vroom              1.6.1      2023-01-22 [1] CRAN (R 4.2.0)
 withr              2.5.0      2022-03-03 [1] CRAN (R 4.2.0)
 xfun               0.39       2023-04-20 [1] RSPM (R 4.2.3)
 xtable             1.8-4      2019-04-21 [1] CRAN (R 4.2.0)
 yaml               2.3.7      2023-01-23 [1] RSPM
 zeallot            0.1.0      2018-01-28 [1] CRAN (R 4.2.0)
 zoo                1.8-12     2023-04-13 [1] RSPM (R 4.2.3)

 [1] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library

──────────────────────────────────────────────────────────────────────────────