diff --git a/analysis_benchmark_lbm_seq.html b/analysis_benchmark_lbm_seq.html
new file mode 100644
index 0000000..1d9d5b2
--- /dev/null
+++ b/analysis_benchmark_lbm_seq.html
@@ -0,0 +1,3993 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
here() starts at /home/louis/Documents/Thèse/Axe 3 - Inférence et Microbiote/human-microbiome-compendium
+
+
library (stringr)
+library (tidyverse)
+
+
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+✔ dplyr 1.1.4 ✔ purrr 1.0.4
+✔ forcats 1.0.1 ✔ readr 2.1.6
+✔ ggplot2 4.0.1 ✔ tibble 3.3.1
+✔ lubridate 1.9.4 ✔ tidyr 1.3.2
+
+
+
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+✖ dplyr::filter() masks stats::filter()
+✖ dplyr::lag() masks stats::lag()
+ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
+
+
library (phyloseq)
+library (biomformat)
+source ("utils.R" )
+
+
+Attachement du package : 'kableExtra'
+
+L'objet suivant est masqué depuis 'package:dplyr':
+
+ group_rows
+
+
+Attachement du package : 'rlang'
+
+Les objets suivants sont masqués depuis 'package:purrr':
+
+ %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
+ flatten_raw, invoke, splice
+
+
the_data <- import_biom ("data/mach/kinetic.biom" )
+ per_taxa_networks <- collapse_otu_at_taxo (the_data)
+ otu_df <- sapply (per_taxa_networks, nrow) %>%
+ data.frame () %>%
+ rownames_to_column () %>%
+ rename (Nb_OTU = "." , Rank = "rowname" )
+
+
+ flist <- list.files (here ("results" , "lbm-seq" ), full.names = TRUE , pattern = ".Rds" )
+
+ para_flist <- grepv (pattern = "para.Rds" , flist)
+ seq_flist <- grepv (pattern = "seq.Rds" , flist)
+ notrans_flist <- grepv (pattern = "notrans.Rds" , flist)
+
+
+
bench_df <- do.call ("rbind" , lapply (flist, function (file) readRDS (file)$ benchmark))
+
+ bench_df <- bench_df %>%
+ mutate (expr = as.character (expr)) %>%
+ separate_wider_regex (cols = "expr" , patterns = c (Rank = "Rank[0-9]" , type = "para|seq|notrans" )) %>%
+ mutate (Rank = as.factor (Rank), type = as.factor (type)) %>%
+ left_join (otu_df, by = "Rank" ) %>%
+ mutate (Rank = as.factor (Rank), type = as.factor (type))
+
+levels (bench_df$ Rank) <- c ("Phylum" , "Class" , "Order" , "Family" , "Genus" )
+
+
+
library (ggplot2)
+
+ coeff <- 220000000000
+
+ggplot (bench_df, aes (x = Rank, col = type)) +
+ geom_boxplot (aes (y = time)) +
+ geom_point (aes (y = coeff * Nb_OTU, size = Nb_OTU), shape = 13 ) +
+ scale_color_manual (values = c ("#363634" , "#009E73" , "#CC79A7" ), labels = c ("No transfer" , "Parallelized" , "Sequential" )) +
+ scale_y_continuous (sec.axis = sec_axis (~ . / coeff, name = "Number of OTUs" )) +
+ labs (size = "Number of OTUs" , color = "Algorithm Type" , y = "Time (seconds)" ) +
+ theme_minimal ()
+
+
+
+
+
+
+
+
+
+
load_result_list <- function (flist) {
+ results <- lapply (flist, function (file) readRDS (file)$ models)
+ names (results) <- paste0 ("Rep" , seq_along (results))
+ results <- purrr:: transpose (results)
+ return (results)
+ }
+
+library (aricode)
+ ARI_in_repet_Z1 <- function (result_list) {
+ lapply (result_list, function (rank) {
+ Z1_list <- lapply (rank, function (repet) {
+ Z11 <- apply (repet$ memberships[[which.max (repet$ ICL)]]$ Z1, 1 , which.max)
+ })
+ outer (X = Z1_list, Y = Z1_list, Vectorize (ARI))
+ })
+ }
+
+ ARI_in_repet_Z2 <- function (result_list) {
+ lapply (result_list, function (rank) {
+ Z2_list <- lapply (rank, function (repet) {
+ Z2 <- apply (repet$ memberships[[which.max (repet$ ICL)]]$ Z2, 1 , which.max)
+ })
+ outer (X = Z2_list, Y = Z2_list, Vectorize (ARI))
+ })
+ }
+
+
+
seq_results <- load_result_list (seq_flist)
+ para_results <- load_result_list (para_flist)
+ notrans_results <- load_result_list (notrans_flist)
+
+
+
ARI_in_repet_Z1 (seq_results)
+
+
$Rank2
+ Rep1 Rep2 Rep3 Rep4
+Rep1 1 1 1 1
+Rep2 1 1 1 1
+Rep3 1 1 1 1
+Rep4 1 1 1 1
+
+$Rank3
+ Rep1 Rep2 Rep3 Rep4
+Rep1 1 1 1 1
+Rep2 1 1 1 1
+Rep3 1 1 1 1
+Rep4 1 1 1 1
+
+$Rank4
+ Rep1 Rep2 Rep3 Rep4
+Rep1 1 1 1 1
+Rep2 1 1 1 1
+Rep3 1 1 1 1
+Rep4 1 1 1 1
+
+$Rank5
+ Rep1 Rep2 Rep3 Rep4
+Rep1 1 1 1 1
+Rep2 1 1 1 1
+Rep3 1 1 1 1
+Rep4 1 1 1 1
+
+
ARI_in_repet_Z1 (para_results)
+
+
$Rank2
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+$Rank3
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+$Rank4
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+$Rank5
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+
ARI_in_repet_Z1 (notrans_results)
+
+
$Rank2
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+$Rank3
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+$Rank4
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+$Rank5
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+
+
+
ARI_in_repet_Z2 (seq_results)
+
+
$Rank2
+ Rep1 Rep2 Rep3 Rep4
+Rep1 1 1 1 1
+Rep2 1 1 1 1
+Rep3 1 1 1 1
+Rep4 1 1 1 1
+
+$Rank3
+ Rep1 Rep2 Rep3 Rep4
+Rep1 1 1 1 1
+Rep2 1 1 1 1
+Rep3 1 1 1 1
+Rep4 1 1 1 1
+
+$Rank4
+ Rep1 Rep2 Rep3 Rep4
+Rep1 1 1 1 1
+Rep2 1 1 1 1
+Rep3 1 1 1 1
+Rep4 1 1 1 1
+
+$Rank5
+ Rep1 Rep2 Rep3 Rep4
+Rep1 1 1 1 1
+Rep2 1 1 1 1
+Rep3 1 1 1 1
+Rep4 1 1 1 1
+
+
ARI_in_repet_Z2 (para_results)
+
+
$Rank2
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+$Rank3
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+$Rank4
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+$Rank5
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+
ARI_in_repet_Z2 (notrans_results)
+
+
$Rank2
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+$Rank3
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+$Rank4
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+$Rank5
+ Rep1 Rep2 Rep3
+Rep1 1 1 1
+Rep2 1 1 1
+Rep3 1 1 1
+
+
+Tous les Z sont concordants entre les répétitions, je vais donc sélectionner une seule répétition de chaque pour l’analyse.
+
+Analyse des résultats des LBM
+Ci-après on extrait les premiers modèles.
+
+
seq_model <- purrr:: transpose (seq_results)[[2 ]]
+ para_model <- purrr:: transpose (para_results)[[1 ]]
+ notrans_model <- purrr:: transpose (notrans_results)[[1 ]]
+
+
+
extract_memberships <- function (model) {
+ memberships <- model$ memberships[[which.max (model$ ICL)]]
+ rownames (memberships[["Z1" ]]) <- rownames (model$ adj)
+ rownames (memberships[["Z2" ]]) <- colnames (model$ adj)
+ memberships
+ map_memberships <- list (Z1 = apply (memberships[["Z1" ]], 1 , which.max), Z2 = apply (memberships[["Z2" ]], 1 , which.max))
+ map_memberships
+ }
+
+
+Comparaison des ARI par rang taxonomique
+
+
seq_memberships <- lapply (seq_model, extract_memberships)
+ para_memberships <- lapply (para_model, extract_memberships)
+ notrans_memberships <- lapply (notrans_model, extract_memberships)
+
+
+
tibble ("Para v No transfer" = map2_dbl (purrr:: transpose (para_memberships)[["Z1" ]], purrr:: transpose (notrans_memberships)[["Z1" ]], .f = ARI), "Sequential v No transfer" = map2_dbl (purrr:: transpose (seq_memberships)[["Z1" ]], purrr:: transpose (notrans_memberships)[["Z1" ]], .f = ARI), "Para v Sequential" = map2_dbl (purrr:: transpose (para_memberships)[["Z1" ]], purrr:: transpose (seq_memberships)[["Z1" ]], .f = ARI)) %>% t () %>%
+ knitr:: kable (col.names = paste0 ("Rank" , seq (2 ,5 )), caption = "ARI for the methods for OTU memberships" )
+
+
+ARI for the methods for OTU memberships
+
+
+
+
+
+Para v No transfer
+1
+1
+1
+1
+
+
+Sequential v No transfer
+1
+1
+1
+1
+
+
+Para v Sequential
+1
+1
+1
+1
+
+
+
+
+
+
+
tibble ("Para v No transfer" = map2_dbl (purrr:: transpose (para_memberships)[["Z2" ]], purrr:: transpose (notrans_memberships)[["Z2" ]], .f = ARI), "Sequential v No transfer" = map2_dbl (purrr:: transpose (seq_memberships)[["Z2" ]], purrr:: transpose (notrans_memberships)[["Z2" ]], .f = ARI), "Para v Sequential" = map2_dbl (purrr:: transpose (para_memberships)[["Z2" ]], purrr:: transpose (seq_memberships)[["Z2" ]], .f = ARI)) %>% t () %>%
+ knitr:: kable (col.names = paste0 ("Rank" , seq (2 ,5 )), caption = "ARI for the methods for sample memberships" )
+
+
+ARI for the methods for sample memberships
+
+
+
+
+
+Para v No transfer
+1
+1
+0.9532922
+1
+
+
+Sequential v No transfer
+1
+1
+0.9532922
+1
+
+
+Para v Sequential
+1
+1
+1.0000000
+1
+
+
+
+
+
+
+
library (ggalluvial)
+
+
+ membership_to_df <- function (membership_list, suffix = 1 ) {
+ lapply (membership_list, function (membership_vec) {
+ df <- data.frame (membership_vec)
+ colnames (df) <- paste0 ("Z" , suffix)
+ df %>%
+ rownames_to_column (var = "Rank" ) %>%
+ separate_wider_delim (cols = "Rank" , delim = ";_;" , names_sep = "" )
+ })
+ }
+
+ seq_df <- lapply (seq_along (seq_memberships), function (idx) {
+ membership_to_df (seq_memberships[[idx]], suffix = idx + 1 )
+ })
+
+ seq_df_otu <- lapply (seq_df, function (df) {
+ df$ Z1
+ }) %>% # Below we join by the ranks
+ reduce (left_join) %>%
+ select (sort (names (.)))
+
+
Joining with `by = join_by(Rank1, Rank2)`
+Joining with `by = join_by(Rank1, Rank2, Rank3)`
+Joining with `by = join_by(Rank1, Rank2, Rank3, Rank4)`
+
+
seq_df_sample <- lapply (seq_df, function (df) {
+ df$ Z2
+ }) %>% reduce (left_join, by = "Rank1" )
+
+
+ seq_df_sample_lodes <- to_lodes_form (seq_df_sample, key = "x" , axes = 2 : 5 )
+ seq_df_otu_lodes <- to_lodes_form (seq_df_otu, key = "x" , axes = 6 : 9 )
+
+
+
mach_metadata <- read.table (file = "data/mach/kinetic_sample_metadata.tsv" ) %>% rownames_to_column (var = "Rank1" )
+
+ seq_df_sample_lodes <- left_join (seq_df_sample_lodes, mach_metadata)
+
+
Joining with `by = join_by(Rank1)`
+
+
ggplot (seq_df_sample_lodes, aes (x = x, alluvium = alluvium, stratum = stratum, label = stratum)) +
+ geom_alluvium (aes (fill = Weaned)) +
+ geom_stratum () +
+ geom_text (stat = "stratum" )
+
+
+
+
+
+
+
+
ggplot (seq_df_otu_lodes, aes (x = x, alluvium = alluvium, stratum = stratum, label = stratum)) +
+ geom_alluvium (aes (fill = Rank2)) +
+ geom_stratum () +
+ geom_text (stat = "stratum" )
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/analysis_benchmark_lbm_seq.qmd b/analysis_benchmark_lbm_seq.qmd
index 7a045fd..27d8f51 100644
--- a/analysis_benchmark_lbm_seq.qmd
+++ b/analysis_benchmark_lbm_seq.qmd
@@ -1,6 +1,7 @@
---
title: "Stats at Taxo"
format: html
+embed-resources: true
---
```{r}