here() starts at /home/louis/Documents/Thèse/Axe 3 - Inférence et Microbiote/human-microbiome-compendium
library (stringr)
library (tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ purrr 1.0.4
✔ forcats 1.0.1 ✔ readr 2.1.6
✔ ggplot2 4.0.1 ✔ tibble 3.3.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (phyloseq)
library (biomformat)
source ("utils.R" )
Attachement du package : 'kableExtra'
L'objet suivant est masqué depuis 'package:dplyr':
group_rows
Attachement du package : 'rlang'
Les objets suivants sont masqués depuis 'package:purrr':
%@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
flatten_raw, invoke, splice
the_data <- import_biom ("data/chaillou/chaillou.biom" )
per_taxa_networks <- collapse_otu_at_taxo (the_data)
otu_df <- sapply (per_taxa_networks, nrow) %>%
data.frame () %>%
rownames_to_column () %>%
rename (Nb_OTU = "." , Rank = "rowname" )
flist <- list.files (here ("results" , "lbm-seq" , "chaillou" ), full.names = TRUE , pattern = ".Rds" )
para_flist <- grepv (pattern = "para.Rds" , flist)
seq_flist <- grepv (pattern = "seq.Rds" , flist)
notrans_flist <- grepv (pattern = "notrans.Rds" , flist)
bench_df <- do.call ("rbind" , lapply (flist, function (file) readRDS (file)$ benchmark))
bench_df <- bench_df %>%
mutate (expr = as.character (expr)) %>%
separate_wider_regex (cols = "expr" , patterns = c (Rank = "Rank[0-9]" , type = "para|seq|notrans" )) %>%
mutate (Rank = as.factor (Rank), type = as.factor (type)) %>%
left_join (otu_df, by = "Rank" ) %>%
mutate (Rank = as.factor (Rank), type = as.factor (type))
levels (bench_df$ Rank) <- c ("Phylum" , "Class" , "Order" , "Family" , "Genus" )
library (ggplot2)
coeff <- 220000000000
ggplot (bench_df, aes (x = Rank, col = type)) +
geom_boxplot (aes (y = time)) +
geom_point (aes (y = coeff * Nb_OTU, size = Nb_OTU), shape = 13 ) +
scale_color_manual (values = c ("#363634" , "#009E73" , "#CC79A7" ), labels = c ("No transfer" , "Parallelized" , "Sequential" )) +
scale_y_continuous (sec.axis = sec_axis (~ . / coeff, name = "Number of OTUs" )) +
labs (size = "Number of OTUs" , color = "Algorithm Type" , y = "Time (seconds)" ) +
theme_minimal ()
load_result_list <- function (flist) {
results <- lapply (flist, function (file) readRDS (file)$ models)
names (results) <- paste0 ("Rep" , seq_along (results))
results <- purrr:: transpose (results)
return (results)
}
library (aricode)
ARI_in_repet_Z1 <- function (result_list) {
lapply (result_list, function (rank) {
Z1_list <- lapply (rank, function (repet) {
Z11 <- apply (repet$ memberships[[which.max (repet$ ICL)]]$ Z1, 1 , which.max)
})
outer (X = Z1_list, Y = Z1_list, Vectorize (ARI))
})
}
ARI_in_repet_Z2 <- function (result_list) {
lapply (result_list, function (rank) {
Z2_list <- lapply (rank, function (repet) {
Z2 <- apply (repet$ memberships[[which.max (repet$ ICL)]]$ Z2, 1 , which.max)
})
outer (X = Z2_list, Y = Z2_list, Vectorize (ARI))
})
}
seq_results <- load_result_list (seq_flist)
para_results <- load_result_list (para_flist)
notrans_results <- load_result_list (notrans_flist)
ARI_in_repet_Z1 (seq_results)
$Rank2
Rep1
Rep1 1
$Rank3
Rep1
Rep1 1
$Rank4
Rep1
Rep1 1
$Rank5
Rep1
Rep1 1
ARI_in_repet_Z1 (para_results)
$Rank2
Rep1
Rep1 1
$Rank3
Rep1
Rep1 1
$Rank4
Rep1
Rep1 1
$Rank5
Rep1
Rep1 1
ARI_in_repet_Z1 (notrans_results)
$Rank2
Rep1
Rep1 1
$Rank3
Rep1
Rep1 1
$Rank4
Rep1
Rep1 1
$Rank5
Rep1
Rep1 1
ARI_in_repet_Z2 (seq_results)
$Rank2
Rep1
Rep1 1
$Rank3
Rep1
Rep1 1
$Rank4
Rep1
Rep1 1
$Rank5
Rep1
Rep1 1
ARI_in_repet_Z2 (para_results)
$Rank2
Rep1
Rep1 1
$Rank3
Rep1
Rep1 1
$Rank4
Rep1
Rep1 1
$Rank5
Rep1
Rep1 1
ARI_in_repet_Z2 (notrans_results)
$Rank2
Rep1
Rep1 1
$Rank3
Rep1
Rep1 1
$Rank4
Rep1
Rep1 1
$Rank5
Rep1
Rep1 1
Tous les Z sont concordants entre les répétitions, je vais donc sélectionner une seule répétition de chaque pour l’analyse.
Analyse des résultats des LBM
Ci-après on extrait les premiers modèles.
seq_model <- purrr:: transpose (seq_results)[[1 ]]
para_model <- purrr:: transpose (para_results)[[1 ]]
notrans_model <- purrr:: transpose (notrans_results)[[1 ]]
extract_memberships <- function (model) {
memberships <- model$ memberships[[which.max (model$ ICL)]]
rownames (memberships[["Z1" ]]) <- rownames (model$ adj)
rownames (memberships[["Z2" ]]) <- colnames (model$ adj)
memberships
map_memberships <- list (Z1 = apply (memberships[["Z1" ]], 1 , which.max), Z2 = apply (memberships[["Z2" ]], 1 , which.max))
map_memberships
}
Comparaison des ARI par rang taxonomique
seq_memberships <- lapply (seq_model, extract_memberships)
para_memberships <- lapply (para_model, extract_memberships)
notrans_memberships <- lapply (notrans_model, extract_memberships)
tibble ("Para v No transfer" = map2_dbl (purrr:: transpose (para_memberships)[["Z1" ]], purrr:: transpose (notrans_memberships)[["Z1" ]], .f = ARI), "Sequential v No transfer" = map2_dbl (purrr:: transpose (seq_memberships)[["Z1" ]], purrr:: transpose (notrans_memberships)[["Z1" ]], .f = ARI), "Para v Sequential" = map2_dbl (purrr:: transpose (para_memberships)[["Z1" ]], purrr:: transpose (seq_memberships)[["Z1" ]], .f = ARI)) %>% t () %>%
knitr:: kable (col.names = paste0 ("Rank" , seq (2 ,5 )), caption = "ARI for the methods for OTU memberships" )
ARI for the methods for OTU memberships
Para v No transfer
1
0.9795031
1
1.0000000
Sequential v No transfer
1
0.9795031
1
0.9391289
Para v Sequential
1
1.0000000
1
0.9391289
tibble ("Para v No transfer" = map2_dbl (purrr:: transpose (para_memberships)[["Z2" ]], purrr:: transpose (notrans_memberships)[["Z2" ]], .f = ARI), "Sequential v No transfer" = map2_dbl (purrr:: transpose (seq_memberships)[["Z2" ]], purrr:: transpose (notrans_memberships)[["Z2" ]], .f = ARI), "Para v Sequential" = map2_dbl (purrr:: transpose (para_memberships)[["Z2" ]], purrr:: transpose (seq_memberships)[["Z2" ]], .f = ARI)) %>% t () %>%
knitr:: kable (col.names = paste0 ("Rank" , seq (2 ,5 )), caption = "ARI for the methods for sample memberships" )
ARI for the methods for sample memberships
Para v No transfer
1
0.9279241
0.9279268
1.0000000
Sequential v No transfer
1
0.9279241
0.9279268
0.9756842
Para v Sequential
1
1.0000000
1.0000000
0.9756842
library (ggalluvial)
membership_to_df <- function (membership_list, suffix = 1 ) {
lapply (membership_list, function (membership_vec) {
df <- data.frame (membership_vec)
colnames (df) <- paste0 ("Z" , suffix)
df %>%
rownames_to_column (var = "Rank" ) %>%
separate_wider_delim (cols = "Rank" , delim = ";_;" , names_sep = "" )
})
}
seq_df <- lapply (seq_along (seq_memberships), function (idx) {
membership_to_df (seq_memberships[[idx]], suffix = idx + 1 )
})
seq_df_otu <- lapply (seq_df, function (df) {
df$ Z1
}) %>% # Below we join by the ranks
reduce (left_join) %>%
select (sort (names (.)))
Joining with `by = join_by(Rank1, Rank2)`
Joining with `by = join_by(Rank1, Rank2, Rank3)`
Joining with `by = join_by(Rank1, Rank2, Rank3, Rank4)`
seq_df_sample <- lapply (seq_df, function (df) {
df$ Z2
}) %>% reduce (left_join, by = "Rank1" )
seq_df_sample_lodes <- to_lodes_form (seq_df_sample, key = "x" , axes = 2 : 5 )
seq_df_otu_lodes <- to_lodes_form (seq_df_otu, key = "x" , axes = 6 : 9 )
mach_metadata <- read.table (file = "data/mach/kinetic_sample_metadata.tsv" ) %>% rownames_to_column (var = "Rank1" )
chaillou_metadata <- read.table (file = "data/chaillou/sample_metadata.tsv" ) %>% rownames_to_column (var = "Rank1" )
seq_df_sample_lodes <- left_join (seq_df_sample_lodes, chaillou_metadata)
Joining with `by = join_by(Rank1)`
ggplot (seq_df_sample_lodes, aes (x = x, alluvium = alluvium, stratum = stratum, label = stratum)) +
geom_alluvium (aes (fill = EnvType)) +
geom_stratum () +
geom_text (stat = "stratum" )
ggplot (seq_df_otu_lodes, aes (x = x, alluvium = alluvium, stratum = stratum, label = stratum)) +
geom_alluvium (aes (fill = Rank2)) +
geom_stratum () +
geom_text (stat = "stratum" )