diff --git a/analysis_benchmark_lbm_seq.html b/analysis_benchmark_lbm_seq.html
new file mode 100644
index 0000000..7e94d44
--- /dev/null
+++ b/analysis_benchmark_lbm_seq.html
@@ -0,0 +1,3938 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
here() starts at /home/louis/Documents/Thèse/Axe 3 - Inférence et Microbiote/human-microbiome-compendium
+
+
library (stringr)
+library (tidyverse)
+
+
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+✔ dplyr 1.1.4 ✔ purrr 1.0.4
+✔ forcats 1.0.1 ✔ readr 2.1.6
+✔ ggplot2 4.0.1 ✔ tibble 3.3.1
+✔ lubridate 1.9.4 ✔ tidyr 1.3.2
+
+
+
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+✖ dplyr::filter() masks stats::filter()
+✖ dplyr::lag() masks stats::lag()
+ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
+
+
library (phyloseq)
+library (biomformat)
+source ("utils.R" )
+
+
+Attachement du package : 'kableExtra'
+
+L'objet suivant est masqué depuis 'package:dplyr':
+
+ group_rows
+
+
+Attachement du package : 'rlang'
+
+Les objets suivants sont masqués depuis 'package:purrr':
+
+ %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
+ flatten_raw, invoke, splice
+
+
the_data <- import_biom ("data/chaillou/chaillou.biom" )
+ per_taxa_networks <- collapse_otu_at_taxo (the_data)
+ otu_df <- sapply (per_taxa_networks, nrow) %>%
+ data.frame () %>%
+ rownames_to_column () %>%
+ rename (Nb_OTU = "." , Rank = "rowname" )
+
+
+ flist <- list.files (here ("results" , "lbm-seq" , "chaillou" ), full.names = TRUE , pattern = ".Rds" )
+
+ para_flist <- grepv (pattern = "para.Rds" , flist)
+ seq_flist <- grepv (pattern = "seq.Rds" , flist)
+ notrans_flist <- grepv (pattern = "notrans.Rds" , flist)
+
+
+
bench_df <- do.call ("rbind" , lapply (flist, function (file) readRDS (file)$ benchmark))
+
+ bench_df <- bench_df %>%
+ mutate (expr = as.character (expr)) %>%
+ separate_wider_regex (cols = "expr" , patterns = c (Rank = "Rank[0-9]" , type = "para|seq|notrans" )) %>%
+ mutate (Rank = as.factor (Rank), type = as.factor (type)) %>%
+ left_join (otu_df, by = "Rank" ) %>%
+ mutate (Rank = as.factor (Rank), type = as.factor (type))
+
+levels (bench_df$ Rank) <- c ("Phylum" , "Class" , "Order" , "Family" , "Genus" )
+
+
+
library (ggplot2)
+
+ coeff <- 220000000000
+
+ggplot (bench_df, aes (x = Rank, col = type)) +
+ geom_boxplot (aes (y = time)) +
+ geom_point (aes (y = coeff * Nb_OTU, size = Nb_OTU), shape = 13 ) +
+ scale_color_manual (values = c ("#363634" , "#009E73" , "#CC79A7" ), labels = c ("No transfer" , "Parallelized" , "Sequential" )) +
+ scale_y_continuous (sec.axis = sec_axis (~ . / coeff, name = "Number of OTUs" )) +
+ labs (size = "Number of OTUs" , color = "Algorithm Type" , y = "Time (seconds)" ) +
+ theme_minimal ()
+
+
+
+
+
+
+
+
+
+
load_result_list <- function (flist) {
+ results <- lapply (flist, function (file) readRDS (file)$ models)
+ names (results) <- paste0 ("Rep" , seq_along (results))
+ results <- purrr:: transpose (results)
+ return (results)
+ }
+
+library (aricode)
+ ARI_in_repet_Z1 <- function (result_list) {
+ lapply (result_list, function (rank) {
+ Z1_list <- lapply (rank, function (repet) {
+ Z11 <- apply (repet$ memberships[[which.max (repet$ ICL)]]$ Z1, 1 , which.max)
+ })
+ outer (X = Z1_list, Y = Z1_list, Vectorize (ARI))
+ })
+ }
+
+ ARI_in_repet_Z2 <- function (result_list) {
+ lapply (result_list, function (rank) {
+ Z2_list <- lapply (rank, function (repet) {
+ Z2 <- apply (repet$ memberships[[which.max (repet$ ICL)]]$ Z2, 1 , which.max)
+ })
+ outer (X = Z2_list, Y = Z2_list, Vectorize (ARI))
+ })
+ }
+
+
+
seq_results <- load_result_list (seq_flist)
+ para_results <- load_result_list (para_flist)
+ notrans_results <- load_result_list (notrans_flist)
+
+
+
ARI_in_repet_Z1 (seq_results)
+
+
$Rank2
+ Rep1
+Rep1 1
+
+$Rank3
+ Rep1
+Rep1 1
+
+$Rank4
+ Rep1
+Rep1 1
+
+$Rank5
+ Rep1
+Rep1 1
+
+
ARI_in_repet_Z1 (para_results)
+
+
$Rank2
+ Rep1
+Rep1 1
+
+$Rank3
+ Rep1
+Rep1 1
+
+$Rank4
+ Rep1
+Rep1 1
+
+$Rank5
+ Rep1
+Rep1 1
+
+
ARI_in_repet_Z1 (notrans_results)
+
+
$Rank2
+ Rep1
+Rep1 1
+
+$Rank3
+ Rep1
+Rep1 1
+
+$Rank4
+ Rep1
+Rep1 1
+
+$Rank5
+ Rep1
+Rep1 1
+
+
+
+
ARI_in_repet_Z2 (seq_results)
+
+
$Rank2
+ Rep1
+Rep1 1
+
+$Rank3
+ Rep1
+Rep1 1
+
+$Rank4
+ Rep1
+Rep1 1
+
+$Rank5
+ Rep1
+Rep1 1
+
+
ARI_in_repet_Z2 (para_results)
+
+
$Rank2
+ Rep1
+Rep1 1
+
+$Rank3
+ Rep1
+Rep1 1
+
+$Rank4
+ Rep1
+Rep1 1
+
+$Rank5
+ Rep1
+Rep1 1
+
+
ARI_in_repet_Z2 (notrans_results)
+
+
$Rank2
+ Rep1
+Rep1 1
+
+$Rank3
+ Rep1
+Rep1 1
+
+$Rank4
+ Rep1
+Rep1 1
+
+$Rank5
+ Rep1
+Rep1 1
+
+
+Tous les Z sont concordants entre les répétitions, je vais donc sélectionner une seule répétition de chaque pour l’analyse.
+
+Analyse des résultats des LBM
+Ci-après on extrait les premiers modèles.
+
+
seq_model <- purrr:: transpose (seq_results)[[1 ]]
+ para_model <- purrr:: transpose (para_results)[[1 ]]
+ notrans_model <- purrr:: transpose (notrans_results)[[1 ]]
+
+
+
extract_memberships <- function (model) {
+ memberships <- model$ memberships[[which.max (model$ ICL)]]
+ rownames (memberships[["Z1" ]]) <- rownames (model$ adj)
+ rownames (memberships[["Z2" ]]) <- colnames (model$ adj)
+ memberships
+ map_memberships <- list (Z1 = apply (memberships[["Z1" ]], 1 , which.max), Z2 = apply (memberships[["Z2" ]], 1 , which.max))
+ map_memberships
+ }
+
+
+Comparaison des ARI par rang taxonomique
+
+
seq_memberships <- lapply (seq_model, extract_memberships)
+ para_memberships <- lapply (para_model, extract_memberships)
+ notrans_memberships <- lapply (notrans_model, extract_memberships)
+
+
+
tibble ("Para v No transfer" = map2_dbl (purrr:: transpose (para_memberships)[["Z1" ]], purrr:: transpose (notrans_memberships)[["Z1" ]], .f = ARI), "Sequential v No transfer" = map2_dbl (purrr:: transpose (seq_memberships)[["Z1" ]], purrr:: transpose (notrans_memberships)[["Z1" ]], .f = ARI), "Para v Sequential" = map2_dbl (purrr:: transpose (para_memberships)[["Z1" ]], purrr:: transpose (seq_memberships)[["Z1" ]], .f = ARI)) %>% t () %>%
+ knitr:: kable (col.names = paste0 ("Rank" , seq (2 ,5 )), caption = "ARI for the methods for OTU memberships" )
+
+
+ARI for the methods for OTU memberships
+
+
+
+
+
+Para v No transfer
+1
+0.9795031
+1
+1.0000000
+
+
+Sequential v No transfer
+1
+0.9795031
+1
+0.9391289
+
+
+Para v Sequential
+1
+1.0000000
+1
+0.9391289
+
+
+
+
+
+
+
tibble ("Para v No transfer" = map2_dbl (purrr:: transpose (para_memberships)[["Z2" ]], purrr:: transpose (notrans_memberships)[["Z2" ]], .f = ARI), "Sequential v No transfer" = map2_dbl (purrr:: transpose (seq_memberships)[["Z2" ]], purrr:: transpose (notrans_memberships)[["Z2" ]], .f = ARI), "Para v Sequential" = map2_dbl (purrr:: transpose (para_memberships)[["Z2" ]], purrr:: transpose (seq_memberships)[["Z2" ]], .f = ARI)) %>% t () %>%
+ knitr:: kable (col.names = paste0 ("Rank" , seq (2 ,5 )), caption = "ARI for the methods for sample memberships" )
+
+
+ARI for the methods for sample memberships
+
+
+
+
+
+Para v No transfer
+1
+0.9279241
+0.9279268
+1.0000000
+
+
+Sequential v No transfer
+1
+0.9279241
+0.9279268
+0.9756842
+
+
+Para v Sequential
+1
+1.0000000
+1.0000000
+0.9756842
+
+
+
+
+
+
+
library (ggalluvial)
+
+
+ membership_to_df <- function (membership_list, suffix = 1 ) {
+ lapply (membership_list, function (membership_vec) {
+ df <- data.frame (membership_vec)
+ colnames (df) <- paste0 ("Z" , suffix)
+ df %>%
+ rownames_to_column (var = "Rank" ) %>%
+ separate_wider_delim (cols = "Rank" , delim = ";_;" , names_sep = "" )
+ })
+ }
+
+ seq_df <- lapply (seq_along (seq_memberships), function (idx) {
+ membership_to_df (seq_memberships[[idx]], suffix = idx + 1 )
+ })
+
+ seq_df_otu <- lapply (seq_df, function (df) {
+ df$ Z1
+ }) %>% # Below we join by the ranks
+ reduce (left_join) %>%
+ select (sort (names (.)))
+
+
Joining with `by = join_by(Rank1, Rank2)`
+Joining with `by = join_by(Rank1, Rank2, Rank3)`
+Joining with `by = join_by(Rank1, Rank2, Rank3, Rank4)`
+
+
seq_df_sample <- lapply (seq_df, function (df) {
+ df$ Z2
+ }) %>% reduce (left_join, by = "Rank1" )
+
+
+ seq_df_sample_lodes <- to_lodes_form (seq_df_sample, key = "x" , axes = 2 : 5 )
+ seq_df_otu_lodes <- to_lodes_form (seq_df_otu, key = "x" , axes = 6 : 9 )
+
+
+
mach_metadata <- read.table (file = "data/mach/kinetic_sample_metadata.tsv" ) %>% rownames_to_column (var = "Rank1" )
+ chaillou_metadata <- read.table (file = "data/chaillou/sample_metadata.tsv" ) %>% rownames_to_column (var = "Rank1" )
+
+ seq_df_sample_lodes <- left_join (seq_df_sample_lodes, chaillou_metadata)
+
+
Joining with `by = join_by(Rank1)`
+
+
ggplot (seq_df_sample_lodes, aes (x = x, alluvium = alluvium, stratum = stratum, label = stratum)) +
+ geom_alluvium (aes (fill = EnvType)) +
+ geom_stratum () +
+ geom_text (stat = "stratum" )
+
+
+
+
+
+
+
+
ggplot (seq_df_otu_lodes, aes (x = x, alluvium = alluvium, stratum = stratum, label = stratum)) +
+ geom_alluvium (aes (fill = Rank2)) +
+ geom_stratum () +
+ geom_text (stat = "stratum" )
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/analysis_benchmark_lbm_seq.qmd b/analysis_benchmark_lbm_seq.qmd
index cadfcad..b0a039f 100644
--- a/analysis_benchmark_lbm_seq.qmd
+++ b/analysis_benchmark_lbm_seq.qmd
@@ -11,7 +11,7 @@ library(tidyverse)
library(phyloseq)
library(biomformat)
source("utils.R")
-
+mach_data <- import_biom("data/mach/kinetic.biom")
the_data <- import_biom("data/chaillou/chaillou.biom")
per_taxa_networks <- collapse_otu_at_taxo(the_data)
otu_df <- sapply(per_taxa_networks, nrow) %>%
@@ -20,7 +20,7 @@ otu_df <- sapply(per_taxa_networks, nrow) %>%
rename(Nb_OTU = ".", Rank = "rowname")
-flist <- list.files(here("results", "lbm-seq"), full.names = TRUE, pattern = ".Rds")
+flist <- list.files(here("results", "lbm-seq", "chaillou"), full.names = TRUE, pattern = ".Rds")
para_flist <- grepv(pattern = "para.Rds", flist)
seq_flist <- grepv(pattern = "seq.Rds", flist)
@@ -107,7 +107,7 @@ Tous les Z sont concordants entre les répétitions, je vais donc sélectionner
Ci-après on extrait les premiers modèles.
```{r}
-seq_model <- purrr::transpose(seq_results)[[2]]
+seq_model <- purrr::transpose(seq_results)[[1]]
para_model <- purrr::transpose(para_results)[[1]]
notrans_model <- purrr::transpose(notrans_results)[[1]]
```
@@ -178,11 +178,12 @@ seq_df_otu_lodes <- to_lodes_form(seq_df_otu, key = "x", axes = 6:9)
```{r}
mach_metadata <- read.table(file = "data/mach/kinetic_sample_metadata.tsv") %>% rownames_to_column(var = "Rank1")
+chaillou_metadata <- read.table(file = "data/chaillou/sample_metadata.tsv") %>% rownames_to_column(var = "Rank1")
-seq_df_sample_lodes <- left_join(seq_df_sample_lodes, mach_metadata)
+seq_df_sample_lodes <- left_join(seq_df_sample_lodes, chaillou_metadata)
ggplot(seq_df_sample_lodes, aes(x = x, alluvium = alluvium, stratum = stratum, label = stratum)) +
- geom_alluvium(aes(fill = Weaned)) +
+ geom_alluvium(aes(fill = EnvType)) +
geom_stratum() +
geom_text(stat = "stratum")