From 27bfabed062bb92394030e928ffbe3d182f83505 Mon Sep 17 00:00:00 2001 From: Louis Date: Mon, 2 Feb 2026 09:27:53 +0100 Subject: [PATCH] embedding resources --- analysis_benchmark_lbm_seq.html | 3993 +++++++++++++++++++++++++++++++ analysis_benchmark_lbm_seq.qmd | 1 + 2 files changed, 3994 insertions(+) create mode 100644 analysis_benchmark_lbm_seq.html diff --git a/analysis_benchmark_lbm_seq.html b/analysis_benchmark_lbm_seq.html new file mode 100644 index 0000000..1d9d5b2 --- /dev/null +++ b/analysis_benchmark_lbm_seq.html @@ -0,0 +1,3993 @@ + + + + + + + + + +Stats at Taxo + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+
+

Stats at Taxo

+
+ + + +
+ + + + +
+ + + +
+ + +
+
library(here)
+
+
here() starts at /home/louis/Documents/Thèse/Axe 3 - Inférence et Microbiote/human-microbiome-compendium
+
+
library(stringr)
+library(tidyverse)
+
+
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+✔ dplyr     1.1.4     ✔ purrr     1.0.4
+✔ forcats   1.0.1     ✔ readr     2.1.6
+✔ ggplot2   4.0.1     ✔ tibble    3.3.1
+✔ lubridate 1.9.4     ✔ tidyr     1.3.2
+
+
+
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+✖ dplyr::filter() masks stats::filter()
+✖ dplyr::lag()    masks stats::lag()
+ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
+
+
library(phyloseq)
+library(biomformat)
+source("utils.R")
+
+

+Attachement du package : 'kableExtra'
+
+L'objet suivant est masqué depuis 'package:dplyr':
+
+    group_rows
+
+
+Attachement du package : 'rlang'
+
+Les objets suivants sont masqués depuis 'package:purrr':
+
+    %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
+    flatten_raw, invoke, splice
+
+
the_data <- import_biom("data/mach/kinetic.biom")
+per_taxa_networks <- collapse_otu_at_taxo(the_data)
+otu_df <- sapply(per_taxa_networks, nrow) %>%
+    data.frame() %>%
+    rownames_to_column() %>%
+    rename(Nb_OTU = ".", Rank = "rowname")
+
+
+flist <- list.files(here("results", "lbm-seq"), full.names = TRUE, pattern = ".Rds")
+
+para_flist <- grepv(pattern = "para.Rds", flist)
+seq_flist <- grepv(pattern = "seq.Rds", flist)
+notrans_flist <- grepv(pattern = "notrans.Rds", flist)
+
+
+
bench_df <- do.call("rbind", lapply(flist, function(file) readRDS(file)$benchmark))
+
+bench_df <- bench_df %>%
+    mutate(expr = as.character(expr)) %>%
+    separate_wider_regex(cols = "expr", patterns = c(Rank = "Rank[0-9]", type = "para|seq|notrans")) %>%
+    mutate(Rank = as.factor(Rank), type = as.factor(type)) %>%
+    left_join(otu_df, by = "Rank") %>%
+    mutate(Rank = as.factor(Rank), type = as.factor(type))
+
+levels(bench_df$Rank) <- c("Phylum", "Class", "Order", "Family", "Genus")
+
+
+
library(ggplot2)
+
+coeff <- 220000000000
+
+ggplot(bench_df, aes(x = Rank, col = type)) +
+    geom_boxplot(aes(y = time)) +
+    geom_point(aes(y = coeff * Nb_OTU, size = Nb_OTU), shape = 13) +
+    scale_color_manual(values = c("#363634", "#009E73", "#CC79A7"), labels = c("No transfer", "Parallelized", "Sequential")) +
+    scale_y_continuous(sec.axis = sec_axis(~ . / coeff, name = "Number of OTUs")) +
+    labs(size = "Number of OTUs", color = "Algorithm Type", y = "Time (seconds)") +
+    theme_minimal()
+
+
+
+

+
+
+
+
+
+
load_result_list <- function(flist) {
+    results <- lapply(flist, function(file) readRDS(file)$models)
+    names(results) <- paste0("Rep", seq_along(results))
+    results <- purrr::transpose(results)
+    return(results)
+}
+
+library(aricode)
+ARI_in_repet_Z1 <- function(result_list) {
+    lapply(result_list, function(rank) {
+        Z1_list <- lapply(rank, function(repet) {
+            Z11 <- apply(repet$memberships[[which.max(repet$ICL)]]$Z1, 1, which.max)
+        })
+        outer(X = Z1_list, Y = Z1_list, Vectorize(ARI))
+    })
+}
+
+ARI_in_repet_Z2 <- function(result_list) {
+    lapply(result_list, function(rank) {
+        Z2_list <- lapply(rank, function(repet) {
+            Z2 <- apply(repet$memberships[[which.max(repet$ICL)]]$Z2, 1, which.max)
+        })
+        outer(X = Z2_list, Y = Z2_list, Vectorize(ARI))
+    })
+}
+
+
+
seq_results <- load_result_list(seq_flist)
+para_results <- load_result_list(para_flist)
+notrans_results <- load_result_list(notrans_flist)
+
+
+
ARI_in_repet_Z1(seq_results)
+
+
$Rank2
+     Rep1 Rep2 Rep3 Rep4
+Rep1    1    1    1    1
+Rep2    1    1    1    1
+Rep3    1    1    1    1
+Rep4    1    1    1    1
+
+$Rank3
+     Rep1 Rep2 Rep3 Rep4
+Rep1    1    1    1    1
+Rep2    1    1    1    1
+Rep3    1    1    1    1
+Rep4    1    1    1    1
+
+$Rank4
+     Rep1 Rep2 Rep3 Rep4
+Rep1    1    1    1    1
+Rep2    1    1    1    1
+Rep3    1    1    1    1
+Rep4    1    1    1    1
+
+$Rank5
+     Rep1 Rep2 Rep3 Rep4
+Rep1    1    1    1    1
+Rep2    1    1    1    1
+Rep3    1    1    1    1
+Rep4    1    1    1    1
+
+
ARI_in_repet_Z1(para_results)
+
+
$Rank2
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+$Rank3
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+$Rank4
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+$Rank5
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+
ARI_in_repet_Z1(notrans_results)
+
+
$Rank2
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+$Rank3
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+$Rank4
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+$Rank5
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+
+
+
ARI_in_repet_Z2(seq_results)
+
+
$Rank2
+     Rep1 Rep2 Rep3 Rep4
+Rep1    1    1    1    1
+Rep2    1    1    1    1
+Rep3    1    1    1    1
+Rep4    1    1    1    1
+
+$Rank3
+     Rep1 Rep2 Rep3 Rep4
+Rep1    1    1    1    1
+Rep2    1    1    1    1
+Rep3    1    1    1    1
+Rep4    1    1    1    1
+
+$Rank4
+     Rep1 Rep2 Rep3 Rep4
+Rep1    1    1    1    1
+Rep2    1    1    1    1
+Rep3    1    1    1    1
+Rep4    1    1    1    1
+
+$Rank5
+     Rep1 Rep2 Rep3 Rep4
+Rep1    1    1    1    1
+Rep2    1    1    1    1
+Rep3    1    1    1    1
+Rep4    1    1    1    1
+
+
ARI_in_repet_Z2(para_results)
+
+
$Rank2
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+$Rank3
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+$Rank4
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+$Rank5
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+
ARI_in_repet_Z2(notrans_results)
+
+
$Rank2
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+$Rank3
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+$Rank4
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+$Rank5
+     Rep1 Rep2 Rep3
+Rep1    1    1    1
+Rep2    1    1    1
+Rep3    1    1    1
+
+
+

Tous les Z sont concordants entre les répétitions, je vais donc sélectionner une seule répétition de chaque pour l’analyse.

+
+

Analyse des résultats des LBM

+

Ci-après on extrait les premiers modèles.

+
+
seq_model <- purrr::transpose(seq_results)[[2]]
+para_model <- purrr::transpose(para_results)[[1]]
+notrans_model <- purrr::transpose(notrans_results)[[1]]
+
+
+
extract_memberships <- function(model) {
+    memberships <- model$memberships[[which.max(model$ICL)]]
+    rownames(memberships[["Z1"]]) <- rownames(model$adj)
+    rownames(memberships[["Z2"]]) <- colnames(model$adj)
+    memberships
+    map_memberships <- list(Z1 = apply(memberships[["Z1"]], 1, which.max), Z2 = apply(memberships[["Z2"]], 1, which.max)) 
+    map_memberships
+}
+
+
+

Comparaison des ARI par rang taxonomique

+
+
seq_memberships <- lapply(seq_model, extract_memberships)
+para_memberships <- lapply(para_model, extract_memberships)
+notrans_memberships <- lapply(notrans_model, extract_memberships)
+
+
+
tibble("Para v No transfer" = map2_dbl(purrr::transpose(para_memberships)[["Z1"]], purrr::transpose(notrans_memberships)[["Z1"]], .f = ARI), "Sequential v No transfer" = map2_dbl(purrr::transpose(seq_memberships)[["Z1"]], purrr::transpose(notrans_memberships)[["Z1"]], .f = ARI), "Para v Sequential" = map2_dbl(purrr::transpose(para_memberships)[["Z1"]], purrr::transpose(seq_memberships)[["Z1"]], .f = ARI)) %>% t() %>%
+knitr::kable(col.names = paste0("Rank", seq(2,5)), caption = "ARI for the methods for OTU memberships")
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ARI for the methods for OTU memberships
Rank2Rank3Rank4Rank5
Para v No transfer1111
Sequential v No transfer1111
Para v Sequential1111
+
+
+
+
tibble("Para v No transfer" = map2_dbl(purrr::transpose(para_memberships)[["Z2"]], purrr::transpose(notrans_memberships)[["Z2"]], .f = ARI), "Sequential v No transfer" = map2_dbl(purrr::transpose(seq_memberships)[["Z2"]], purrr::transpose(notrans_memberships)[["Z2"]], .f = ARI), "Para v Sequential" = map2_dbl(purrr::transpose(para_memberships)[["Z2"]], purrr::transpose(seq_memberships)[["Z2"]], .f = ARI)) %>% t() %>%
+knitr::kable(col.names = paste0("Rank", seq(2,5)), caption = "ARI for the methods for sample memberships")
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ARI for the methods for sample memberships
Rank2Rank3Rank4Rank5
Para v No transfer110.95329221
Sequential v No transfer110.95329221
Para v Sequential111.00000001
+
+
+
+
library(ggalluvial)
+
+
+membership_to_df <- function(membership_list, suffix = 1) {
+    lapply(membership_list, function(membership_vec) {
+        df <- data.frame(membership_vec)
+        colnames(df) <- paste0("Z", suffix)
+        df %>%
+            rownames_to_column(var = "Rank") %>%
+            separate_wider_delim(cols = "Rank", delim = ";_;", names_sep = "")
+    })
+}
+
+seq_df <- lapply(seq_along(seq_memberships), function(idx) {
+    membership_to_df(seq_memberships[[idx]], suffix = idx + 1)
+})
+
+seq_df_otu <- lapply(seq_df, function(df) {
+    df$Z1
+}) %>% # Below we join by the ranks
+    reduce(left_join) %>%
+    select(sort(names(.)))
+
+
Joining with `by = join_by(Rank1, Rank2)`
+Joining with `by = join_by(Rank1, Rank2, Rank3)`
+Joining with `by = join_by(Rank1, Rank2, Rank3, Rank4)`
+
+
seq_df_sample <- lapply(seq_df, function(df) {
+    df$Z2
+}) %>% reduce(left_join, by = "Rank1")
+
+
+seq_df_sample_lodes <- to_lodes_form(seq_df_sample, key = "x", axes = 2:5)
+seq_df_otu_lodes <- to_lodes_form(seq_df_otu, key = "x", axes = 6:9)
+
+
+
mach_metadata <- read.table(file = "data/mach/kinetic_sample_metadata.tsv") %>% rownames_to_column(var = "Rank1")
+
+seq_df_sample_lodes <- left_join(seq_df_sample_lodes, mach_metadata)
+
+
Joining with `by = join_by(Rank1)`
+
+
ggplot(seq_df_sample_lodes, aes(x = x, alluvium = alluvium, stratum = stratum, label = stratum)) +
+    geom_alluvium(aes(fill = Weaned)) +
+    geom_stratum() +
+    geom_text(stat = "stratum")
+
+
+
+

+
+
+
+
ggplot(seq_df_otu_lodes, aes(x = x, alluvium = alluvium, stratum = stratum, label = stratum)) +
+    geom_alluvium(aes(fill = Rank2)) +
+    geom_stratum() +
+    geom_text(stat = "stratum")
+
+
+
+

+
+
+
+
+
+
+ +
+ + +
+ + + + + \ No newline at end of file diff --git a/analysis_benchmark_lbm_seq.qmd b/analysis_benchmark_lbm_seq.qmd index 7a045fd..27d8f51 100644 --- a/analysis_benchmark_lbm_seq.qmd +++ b/analysis_benchmark_lbm_seq.qmd @@ -1,6 +1,7 @@ --- title: "Stats at Taxo" format: html +embed-resources: true --- ```{r}