Removing non breaking spaces

2024-06-07 15:48:53 +02:00 · 2024-06-07 15:48:53 +02:00 · b43618b378
commit b43618b378
parent a9b6ae30d4
27 changed files with 2124 additions and 234 deletions
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-#  MIA Stage 2024
+#  MIA Stage 2024

 Ce dépôt accueille tout le travaille que je vais réaliser pendant mon stage au
 MIA Paris-Saclay en 2024.
--- a/code/analysis/analyze_NA_robustness.Rmd
+++ b/code/analysis/analyze_NA_robustness.Rmd
@ -7,7 +7,7 @@ output:
 ---

 ```{r knitropts, echo = FALSE}
-knitr::opts_chunk$set(fig.width=12L, fig.height=10L)
+knitr::opts_chunk$set(fig.width = 12L, fig.height = 10L)
 ```

 ```{r imports, echo = FALSE, include=FALSE}
@ -19,7 +19,7 @@ loadNamespace(package = "patchwork")
 ```

 ```{r data, echo = FALSE}
-#  Loading data
+#  Loading data
 data_folder <- file.path(here(), "code", "results", "simulations", "NA_robustness")
 files_list <- list.files(data_folder, pattern = "^NA_robustness_22-04-2024_17")
 ```
@ -31,13 +31,13 @@ prepare_dataframes_auc_ari <- function(data) {
    sampling <- unique(data[["sampling"]])
    struct <- unique(data[["struct"]])

-    #  Averaging over repetitions
+    #  Averaging over repetitions
    averaged_data <- data %>%
        group_by(prop_NAs, model) %>%
        select(-c(repetition, sampling, struct)) %>%
        summarise_all(list(mean = mean, sd = sd))

-    #  Preparing auc_data
+    #  Preparing auc_data
    auc_data <- averaged_data %>%
        select(c(prop_NAs, model) | contains("auc_")) %>%
        rename_with(~ gsub("auc_", "", .x, fixed = TRUE))
--- a/code/analysis/analyze_inference_bernoulli.qmd
+++ b/code/analysis/analyze_inference_bernoulli.qmd
@ -24,7 +24,7 @@ loadNamespace("kableExtra")
 ```

 ```{r data, echo = FALSE}
-#  Loading data
+#  Loading data
 data_folder <- file.path(here(), "code", "results", "simulations", "inference", "bernoulli")
 # files_list <- list.files(data_folder, pattern = "^NA_robustness_19-04-2024")
 data <- readRDS(file.path(data_folder, "bernoulli_inference_06-05-2024_17-21-48_1-972.Rds"))
@ -62,7 +62,7 @@ print_data <- av_data %>%
    group_by(epsilon_alpha) %>%
    round(2) %>%
    mutate(
-        #  Model comparison
+        #  Model comparison
        pirho_over_sep = pirho_over_sep_mean,
        pirho_over_iid = pirho_over_iid_mean,
        pirho_over_pi = pirho_over_pi_mean,
@ -77,7 +77,7 @@ print_data <- av_data %>%
        pirho_Q2_over_4 = pirho_Q2_over_4_mean,


-        #  Grouping accuracy
+        #  Grouping accuracy
        pirho_mean_row_ARI = paste(pirho_mean_row_ARI_mean,
            pirho_mean_row_ARI_sd,
            sep = "\\pm"
@ -132,7 +132,7 @@ ft_1 <- flextable(print_data) |>
        opts_html = list(
            scroll = list(freeze_first_column = TRUE)
        )
-    ) |> 
+    ) |>
    compose(j = "pirho_mean_row_ARI", value = as_paragraph(as_equation(pirho_mean_row_ARI)))
 ft_1 <- compose(ft_1, j = "pirho_mean_col_ARI", value = as_paragraph(as_equation(pirho_mean_col_ARI)))
 ft_1 <- compose(ft_1, j = "pirho_double_row_ARI", value = as_paragraph(as_equation(pirho_double_row_ARI)))
@ -143,7 +143,7 @@ ft_1 <- ft_1 |> mk_par(
        as_equation(col_keys, width = .1, height = .2)
    )
 )
-theme_zebra(ft_1) |> 
+theme_zebra(ft_1) |>
    hline(i = 1, border = fp_border_default(), part = "header") |>
    vline(j = 1, border = fp_border_default(), part = "all") |>
    bg(j = 1, bg = "#DDDDDD", part = "all") |>
--- a/code/analysis/investigating/invest_clust_perf.R
+++ b/code/analysis/investigating/invest_clust_perf.R
@ -6,9 +6,9 @@ set.seed(1234, "L'Ecuyer-CMRG")
 data(dorebipartite)
 str(dorebipartite)

-nb_cores = parallelly::availableCores()
-concurrent_models <- min(length(seq(1,4)), nb_cores%/%2)
-per_model <- (nb_cores - concurrent_models) %/% length(seq(1,4)) 
+nb_cores <- parallelly::availableCores()
+concurrent_models <- min(length(seq(1, 4)), nb_cores %/% 2)
+per_model <- (nb_cores - concurrent_models) %/% length(seq(1, 4))

 mb1 <- microbenchmark(
    "fancy computation" = {
@ -44,4 +44,4 @@ mb2 <- microbenchmark(
    times = 5L
 )

-# Bilan : plus intéressant de bourriner
+# Bilan : plus intéressant de bourriner
--- a/code/analysis/investigating/invest_inference_bernoulli_double_ARI.Rmd
+++ b/code/analysis/investigating/invest_inference_bernoulli_double_ARI.Rmd
@ -44,7 +44,7 @@ file_list <- list.files(data_folder)
 file <- file_list[[1]]
 data <- readRDS(file.path(data_folder, file))

-#  Extracting data to netlist and Z
+#  Extracting data to netlist and Z
 netlist_memb <- data[["netlist"]]
 netlist <- lapply(netlist_memb, function(full) full[["incidence_matrix"]])
 nr <- nrow(netlist[[1]])
@ -58,8 +58,6 @@ joined_col_memberships <- unlist(col_blockmemberships)

 row_taus <- lapply(row_blockmemberships, function(Z1_m) colSBM:::.one_hot(Z1_m, 4))
 col_taus <- lapply(col_blockmemberships, function(Z2_m) colSBM:::.one_hot(Z2_m, 4))
-
-
 ```

 ```{r parameters, echo = FALSE}
@ -110,7 +108,7 @@ ARI(joined_col_memberships, good_joined_col)

 ```{r, echo = FALSE}
 wrap_plots(
-    plot_alpha(data[["alpha"]])+ labs(caption = "Vrai alpha"),
+    plot_alpha(data[["alpha"]]) + labs(caption = "Vrai alpha"),
    plot(good_model, type = "meso", mixture = TRUE, values = TRUE) + labs(caption = "Q = (4,4)"),
    plot(bad_model, type = "meso", mixture = TRUE, values = TRUE) + labs(caption = "Q = (4,5)")
 )
@ -143,18 +141,20 @@ Ici on clone le modèle et on lui donne les bons paramètres pour voir s'il fait
 mieux que celui trouvé avant.

 ```{r testing_true_params, echo = FALSE}
-# Un clone
+# Un clone
 good_clone <- good_model$clone()

-# Les vrais paramètres
+# Les vrais paramètres
 alpha <- data[["alpha"]]
 taus <- lapply(seq_along(row_taus), function(m) {
-    list(row = row_taus[[m]],
-        col = col_taus[[m]])
+    list(
+        row = row_taus[[m]],
+        col = col_taus[[m]]
+    )
 })
 # Pi params
 pi1 <- as.vector(data[["pi1"]])
-pi1[1] <- pi1[1] + 1e-9 
+pi1[1] <- pi1[1] + 1e-9
 rho1 <- rep(0.25, 4)
 pi2 <- rep(0.25, 4)
 rho2 <- as.vector(data[["rho2"]])
@ -165,15 +165,17 @@ good_clone[["tau"]] <- taus
 good_clone[["pi"]] <- pi
 good_clone[["alpha"]] <- alpha

-result_BICL <- c(bad_BICL = bad_model$compute_BICL(),
-good_model_BICL = good_model$compute_BICL(),
-good_clone_BICL = good_clone$compute_BICL())
+result_BICL <- c(
+    bad_BICL = bad_model$compute_BICL(),
+    good_model_BICL = good_model$compute_BICL(),
+    good_clone_BICL = good_clone$compute_BICL()
+)
 print(result_BICL)
 ```

 On vient mettre à la place du 4,4 le modèle avec les vrais paramètres.
 Et on espère voir la bonne information se diffuser.
 ```{r adjusting, echo = FALSE}
-fitted_bisbmpop_pirho$model_list[[4,4]] <- good_clone
-fit_1_pass <- adjust_colBiSBM(fitted_bisbmpop_pirho, Q = c(4,4), depth = 1L, nb_pass = 1L)
+fitted_bisbmpop_pirho$model_list[[4, 4]] <- good_clone
+fit_1_pass <- adjust_colBiSBM(fitted_bisbmpop_pirho, Q = c(4, 4), depth = 1L, nb_pass = 1L)
 ```
--- a/code/analysis/investigating/invest_split_clust.R
+++ b/code/analysis/investigating/invest_split_clust.R
@ -2,7 +2,7 @@ library(here)

 inv_folder <- file.path(here(), "code", "results", "investigating", "split_clust")

-for(file in list.files(inv_folder)) {
+for (file in list.files(inv_folder)) {
    load(file.path(inv_folder, file))
 }

@ -16,10 +16,9 @@ noname_X <- as.matrix(noname_X)

 colSBM:::split_clust(noname_X, Z, Q = 1, is_bipartite = TRUE)

-alt_X <- colSBM::generate_bipartite_collection(nr = 15, nc = 30, pi = c(1), rho = c(1), alpha = c(0.7), M = 1
-)[[1]]
+alt_X <- colSBM::generate_bipartite_collection(nr = 15, nc = 30, pi = c(1), rho = c(1), alpha = c(0.7), M = 1)[[1]]
 colSBM:::split_clust(alt_X, Z, Q = 1, is_bipartite = TRUE)


-# La solution était que X était regardé comme une table et unique marche 
-# différemment 
+# La solution était que X était regardé comme une table et unique marche
+# différemment
--- a/code/applications/base_analysis.qmd
+++ b/code/applications/base_analysis.qmd
@ -0,0 +1,133 @@
+### Modèle  ${{model}}$
+
+```{r}
+library(here)
+
+library(colSBM)
+
+library(dplyr)
+library(tidyr)
+
+library(plotly)
+library(ggplot2)
+library(ggforce)
+library(ggrepel)
+library(latex2exp)
+```
+```{r}
+list_collection <- readRDS("{{clustering}}")
+unlisted_best_partition <- unlist(extract_best_bipartite_partition(list_collection))
+
+graph_size_df <- build_graph_size_dataframe(unlisted_best_partition)
+summarized_graph_size_df <- graph_size_df %>%
+    group_by(collection_id) %>%
+    select(-net_id) %>%
+    summarise(
+        M = n(),
+        nr_mean = mean(nr),
+        nr_sd = ifelse(length(nr) > 1, sd(nr), 0),
+        nc_mean = mean(nc),
+        nc_sd = ifelse(length(nc) > 1, sd(nc), 0),
+        Qr = first(Qr),
+        Qc = first(Qc)
+    )
+```
+
+:::{.panel-tabset}
+#### Analyse des tailles de cluster
+
+```{r}
+filtered_summarized_graph_size_df <- summarized_graph_size_df
+ggplot(filtered_summarized_graph_size_df) +
+    aes(x = Qc, y = Qr, color = collection_id, label = M) +
+    geom_point() + # , position = position_jitter(h = 0.15, w = 0.05)
+    geom_text_repel() +
+    xlab(TeX("$Q_c$")) +
+    ylab(TeX("$Q_r$")) +
+    coord_fixed()
+
+plot_ly(x = filtered_summarized_graph_size_df$Qc, 
+    y = filtered_summarized_graph_size_df$Qr, 
+    z = filtered_summarized_graph_size_df$M,
+    type = "scatter3d", mode = "markers",
+    color = filtered_summarized_graph_size_df$collection_id)  %>% 
+    layout(scene = list(xaxis = list(title = "Q_c"), 
+        yaxis = list(title = "Q_r")))
+```
+
+#### Analyse des tailles de réseaux
+
+```{r}
+ggplot(filtered_summarized_graph_size_df) +
+    aes(x = nc_mean, y = nr_mean, color = collection_id) +
+    geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
+    geom_ellipse(aes(
+        x0 = nc_mean, y0 = nr_mean,
+        a = 0.5 * nc_sd,
+        b = 0.5 * nr_sd,
+        fill = collection_id, angle = 0
+    ), alpha = 0.1) +
+    geom_text_repel(aes(label = collection_id)) +
+    xlab(TeX("$n_c$")) +
+    ylab(TeX("$n_r$")) +
+    coord_fixed() +
+    ggtitle("Distribution des tailles moyennes et écart-types")
+
+max_nr <- 200
+max_nc <- 200
+
+ggplot(filtered_summarized_graph_size_df %>% filter(nr_mean <= max_nr, nc_mean <= max_nc)) +
+    aes(x = nc_mean, y = nr_mean, color = collection_id) +
+    geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
+    geom_ellipse(aes(
+        x0 = nc_mean, y0 = nr_mean,
+        a = 0.5 * nc_sd,
+        b = 0.5 * nr_sd,
+        fill = collection_id, angle = 0
+    ), alpha = 0.1) +
+    geom_text_repel(aes(label = collection_id)) +
+    xlab(TeX("$n_c$")) +
+    ylab(TeX("$n_r$")) +
+    coord_fixed() +
+    ggtitle(TeX(paste0("Distribution des tailles moyennes et écart-types,", "$n_r <=", max_nr, "$", " et ", "$n_c <=", max_nc, "$")))
+
+filtered_graph_size <- graph_size_df
+ggplot(filtered_graph_size) +
+    aes(x = collection_id, y = nr, fill = collection_id) +
+    geom_boxplot()
+ggplot(filtered_graph_size) +
+    aes(x = collection_id, y = nc, fill = collection_id) +
+    geom_boxplot()
+
+ggplot(filtered_graph_size) +
+    aes(x = collection_id, y = nr + nc, fill = collection_id) +
+    geom_boxplot()
+
+ggplot(filtered_graph_size) +
+    aes(x = collection_id, y = nr * nc, fill = collection_id) +
+    geom_boxplot()
+```
+
+:::
+
+#### Structure des collections
+
+```{r}
+#| output: asis
+for (i in seq_len(length(unlisted_best_partition))) {
+    cat("\n##### Structure collection ", i, "\n")
+    current_col <- unlisted_best_partition[[i]]$clone()
+    current_col$net_id <- current_col$net_id %>%
+        stringr::str_trunc(width = 20L)
+    try({
+        p <- plot(
+            current_col,
+            type = "meso",
+            values = FALSE, mixture = TRUE
+        ) +
+            ggtitle(paste0("Structure\ncollection ", i))
+        print(p)
+    })
+}
+
+```
--- a/code/applications/dore/01_dore_clusterize.R
+++ b/code/applications/dore/01_dore_clusterize.R
@ -24,7 +24,7 @@ if (length(arg) == 0L) {
    }
 }

-#  Arguments checks
+#  Arguments checks
 allowed_model <- c("iid", "pi", "rho", "pirho")
 stopifnot(
    "Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model),
--- a/code/applications/dore/02_dore_analysis.html
+++ b/code/applications/dore/02_dore_analysis.html
--- a/code/applications/dore/02_dore_analysis.qmd
+++ b/code/applications/dore/02_dore_analysis.qmd
@ -1,52 +1,46 @@
 ---
+format: html
+title: Clustering avec `colSBM` des données 
+execute: 
+  echo: false
+  warning: false
 ---

+# Analyse
+
 ```{r}
 library(colSBM)
 library(here)
+library(stringr)
 ```

 ```{r}
 root_app_folder <- file.path(here(), "code", "applications")
 source(file.path(root_app_folder, "utils.R"))
 data_folder <- file.path(here(), "code", "results", "applications", "dore")
-files_vec <- get_recent_files(data_folder)
+files_vec <- get_recent_files(data_folder, n = 16)
 files_vec <- identify_models(files_vec)
-list_collection <- readRDS(file.path(data_folder, "dore_collection_iid_24-05-24_18-07-50.Rds"))
-
-best_partition <- extract_best_bipartite_partition(list_collection)
-length(unlist(best_partition))
-unlisted_bp <- unlist(best_partition)
+list_clustering <- files_vec
+# list_collection <- readRDS(file.path(data_folder, "dore_collection_iid_24-05-24_18-07-50.Rds"))
 ```

-```{r}
-library(stringr)
+## Analyse par modèle

-# string_split <- function(string,size) {
-#     unlist(str_extract_all(string, paste0('.{1,',size,'}')))
-# }
+:::{.panel-tabset}

-# custom_str_wrap <- function(str, width = 30) {
-#     if (nchar(str) > width) {
-#         return(paste(string_split(str, size = width), collapse = "\n"))
-#     }
-#     return(str)
-# }
+```{r write_tabs}
+#| warning: false
+#| output: asis

-iid_BICL <- sum(sapply(unlisted_bp, function(model) model$BICL))
-
-#  Shortening net names
-for (idx in seq_len(length(unlisted_bp))) {
-    unlisted_bp[[idx]]$net_id <- sapply(unlisted_bp[[idx]]$net_id, function(id) str_trunc(id,20))
-}
-
-```
-
-```{r}
-for (idx in seq_len(length(unlisted_bp))) {
-    print(plot(unlisted_bp[[idx]], type = "meso", mixture = TRUE, values = TRUE))
+# Generate content for each model using knit_expand
+for (clustering_idx in seq_len(length(list_clustering))) {
+    clustering <- list_clustering[clustering_idx]
+    model <- names(clustering)
+    expanded_content <- knitr::knit_expand(file = file.path(root_app_folder, "base_analysis.qmd"), clustering = clustering, model = model)
+    res <- knitr::knit_child(text = expanded_content, quiet = TRUE)
+    cat(res, sep = "\n")
+    cat("\n")
 }
 ```

-Analyser par LBM :
-3, 
+:::
--- a/code/applications/herbivores/02_herbivores_clustering.R
+++ b/code/applications/herbivores/02_herbivores_clustering.R
@ -28,7 +28,7 @@ if (length(arg) == 0L) {
    }
 }

-#  Arguments checks
+#  Arguments checks
 allowed_model <- c("iid", "pi", "rho", "pirho")
 stopifnot(
    "Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model)
--- a/code/applications/herbivores/03_herbivores_analysis.html
+++ b/code/applications/herbivores/03_herbivores_analysis.html
--- a/code/applications/herbivores/03_herbivores_analysis.qmd
+++ b/code/applications/herbivores/03_herbivores_analysis.qmd
@ -3,8 +3,18 @@ title: Analysis of herbivores data
 execute: 
  echo: false
  freeze: auto
+  warning: false
 ---

+```{r import_data}
+root_app_folder <- file.path(here(), "code", "applications")
+source(file.path(root_app_folder, "utils.R"))
+data_folder <- file.path(here(), "code", "results", "applications", "herbivores")
+files_vec <- get_recent_files(data_folder)
+filepath <- files_vec[1]
+list_collection <- readRDS(filepath)
+```
+
 ```{r libs_and_data}
 library(here)

@ -12,21 +22,14 @@ library(colSBM)

 library(dplyr)
 library(tidyr)
+
+library(plotly)
 library(ggplot2)
 library(ggforce)
-library(weird)
-
-root_app_folder <- file.path(here(), "code", "applications")
-source(file.path(root_app_folder, "utils.R"))
-data_folder <- file.path(here(), "code", "results", "applications", "herbivores")
-files_vec <- get_recent_files(data_folder)
-# files_vec <- identify_models(files_vec)
+library(ggrepel)
+library(latex2exp)
 ```
-
-```{r}
-filepath <- files_vec[1]
-list_collection <- readRDS(filepath)
-
+```{r dataformatting}
 unlisted_best_partition <- unlist(extract_best_bipartite_partition(list_collection))

 graph_size_df <- build_graph_size_dataframe(unlisted_best_partition)
@ -41,39 +44,72 @@ summarized_graph_size_df <- graph_size_df %>%
        nc_sd = ifelse(length(nc) > 1, sd(nc), 0),
        Qr = first(Qr),
        Qc = first(Qc)
-)
+    )
+```
+### Analyse des tailles de cluster
+```{r collection_blocks_plot_size}
+#| layout-ncol: 2
+#| plotly: true
+filtered_summarized_graph_size_df <- summarized_graph_size_df
+ggplot(filtered_summarized_graph_size_df) +
+    aes(x = Qc, y = Qr, color = collection_id, label = M) +
+    geom_point() + # , position = position_jitter(h = 0.15, w = 0.05)
+    geom_text_repel() +
+    xlab(TeX("$Q_c$")) +
+    ylab(TeX("$Q_r$")) +
+    coord_fixed()
+
+plot_ly(x = filtered_summarized_graph_size_df$Qc, 
+    y = filtered_summarized_graph_size_df$Qr, 
+    z = filtered_summarized_graph_size_df$M,
+    type = "scatter3d", mode = "markers",
+    color = filtered_summarized_graph_size_df$collection_id)  %>% 
+    layout(scene = list(xaxis = list(title = "Q_c"), 
+        yaxis = list(title = "Q_r")))
 ```

-```{r collection_size_boxplot}
-filtered_graph_size <- graph_size_df %>% filter(M > 5)
+### Analyse des tailles de réseaux
+
+```{r size_plot}
+#| layout-ncol: 2
+ggplot(filtered_summarized_graph_size_df) +
+    aes(x = nc_mean, y = nr_mean, color = collection_id) +
+    geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
+    geom_ellipse(aes(
+        x0 = nc_mean, y0 = nr_mean,
+        a = 0.5 * nc_sd,
+        b = 0.5 * nr_sd,
+        fill = collection_id, angle = 0
+    ), alpha = 0.1) +
+    geom_text_repel(aes(label = collection_id)) +
+    xlab(TeX("$n_c$")) +
+    ylab(TeX("$n_r$")) +
+    coord_fixed() +
+    ggtitle("Distribution des tailles moyennes et écart-types") 
+
+max_nr <- 200
+max_nc <- 200
+
+ggplot(filtered_summarized_graph_size_df %>% filter(nr_mean <= max_nr, nc_mean <= max_nc)) +
+    aes(x = nc_mean, y = nr_mean, color = collection_id) +
+    geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
+    geom_ellipse(aes(
+        x0 = nc_mean, y0 = nr_mean,
+        a = 0.5 * nc_sd,
+        b = 0.5 * nr_sd,
+        fill = collection_id, angle = 0
+    ), alpha = 0.1) +
+    geom_text_repel(aes(label = collection_id)) +
+    xlab(TeX("$n_c$")) +
+    ylab(TeX("$n_r$")) +
+    coord_fixed() +
+    ggtitle(TeX(paste0("Distribution des tailles moyennes et écart-types,", "$n_r <=", max_nr, "$", " et ", "$n_c <=", max_nc, "$")))
+
+filtered_graph_size <- graph_size_df %>% filter(M > 1)
 ggplot(filtered_graph_size) +
    aes(x = collection_id, y = nr, fill = collection_id) +
    geom_boxplot()
 ggplot(filtered_graph_size) +
    aes(x = collection_id, y = nc, fill = collection_id) +
    geom_boxplot()
-
-# ggplot(filtered_graph_size) +
-#     aes(x = nc, y = nr, fill = collection_id) +
-gg_bagplot(filtered_graph_size, x = filtered_graph_size[["nc"]], y = filtered_graph_size[["nr"]])
-```
-
-```{r collection_blocks_plot}
-filtered_summarized_graph_size_df <- summarized_graph_size_df %>% filter(M > 5)
-ggplot(filtered_summarized_graph_size_df) +
-    aes(x = Qc, y = Qr, color = collection_id) +
-    geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
-    geom_circle(aes(x0 = Qc, y0 = Qr, r = 0.025 * M, fill = collection_id), alpha = 0.1) +
-    coord_fixed()
-
-```
-
-```{r collection_size_mean}
-filtered_summarized_graph_size_df <- summarized_graph_size_df %>% filter(M > 5)
-ggplot(filtered_summarized_graph_size_df) +
-    aes(x = nc_mean, y = nr_mean, color = collection_id) +
-    geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
-    geom_ellipse(aes(x0 = nc_mean, y0 = nr_mean, a = 0.5 * nc_sd, b = 0.5 * nr_sd, fill = collection_id, angle = 0), alpha = 0.1) +
-    coord_fixed()
-
 ```
--- a/code/applications/utils.R
+++ b/code/applications/utils.R
@ -8,8 +8,12 @@
 #'
 #' @return A vector of size `n` with the file path.
 get_recent_files <- function(data_folder, n = 4) {
-    files_info <- file.info(file.path(data_folder, list.files(data_folder)))
-    files_info[["filepath"]] <- file.path(data_folder, list.files(data_folder))
+    files_info <- file.info(file.path(data_folder, list.files(data_folder,
+        include.dirs = FALSE, pattern = "*dore_collection_[a-z]*_seed"
+    )))
+    files_info[["filepath"]] <- file.path(data_folder, list.files(data_folder,
+        include.dirs = FALSE, pattern = "*dore_collection_[a-z]*_seed"
+    ))
    files_info <- sort_by(files_info, files_info[["ctime"]], decreasing = TRUE)
    if (n > nrow(files_info)) {
        warning(
@ -23,17 +27,10 @@ get_recent_files <- function(data_folder, n = 4) {

 #' Identify models
 identify_models <- function(files_vec) {
-    iid_id <- grep(pattern = "iid", files_vec, fixed = TRUE)
-    pi_id <- grep(pattern = "pi_", files_vec, fixed = TRUE)
-    rho_id <- grep(pattern = "_rho", files_vec, fixed = TRUE)
-    pirho_id <- grep(pattern = "pirho", files_vec, fixed = TRUE)
-    file_order <- c(iid_id, pi_id, rho_id, pirho_id)
-    stopifnot(
-        "There are multiple files matching models !" =
-            length(file_order) <= 4L
+    names(files_vec) <- stringr::str_extract(
+        string = files_vec,
+        pattern = "(iid|pirho|pi|rho)"
    )
-    files_vec <- files_vec[file_order]
-    names(files_vec) <- c("iid", "pi", "rho", "pirho")
    return(files_vec)
 }

@ -50,4 +47,4 @@ build_graph_size_dataframe <- function(collection_list) {
            Qc = collection[["Q"]][[2]]
        )
    }))
-}
+}
--- a/code/applications/weboflife/02_weboflife_clustering.R
+++ b/code/applications/weboflife/02_weboflife_clustering.R
@ -37,7 +37,7 @@ if (length(arg) == 0L) {
    }
 }

-#  Arguments checks
+#  Arguments checks
 allowed_model <- c("iid", "pi", "rho", "pirho")
 stopifnot(
    "Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model),
--- a/code/data/simulations/clustering/synthetic/synthetic_iid_seed_1234.Rds
+++ b/code/data/simulations/clustering/synthetic/synthetic_iid_seed_1234.Rds
--- a/code/results/applications/dore/dore_collection_iid_28-05-24_15-39-23.Rds
+++ b/code/results/applications/dore/dore_collection_iid_28-05-24_15-39-23.Rds
--- a/code/results/applications/dore/dore_collection_pi_28-05-24_19-04-47.Rds
+++ b/code/results/applications/dore/dore_collection_pi_28-05-24_19-04-47.Rds
--- a/code/results/applications/dore/dore_collection_pirho_29-05-24_00-02-48.Rds
+++ b/code/results/applications/dore/dore_collection_pirho_29-05-24_00-02-48.Rds
--- a/code/results/applications/dore/dore_collection_rho_28-05-24_21-47-48.Rds
+++ b/code/results/applications/dore/dore_collection_rho_28-05-24_21-47-48.Rds
--- a/code/scripts/migale_application_herbivores.sh
+++ b/code/scripts/migale_application_herbivores.sh
@ -10,7 +10,7 @@
 #$ -o logs/$JOB_NAME.$TASK_ID
 #$ -e logs/$JOB_NAME.$TASK_ID

-# Creating log directory if it doesn't exists
+# Creating log directory if it doesn't exists
 BASE_DIR="/home/$USER/work/mia-stage-2024"
 LOG_DIR=$(echo "$BASE_DIR/logs")

@ -18,12 +18,12 @@ if [ ! -d "$LOG_DIR" ]; then
    mkdir -p $LOG_DIR
 fi

-# Constant data
+# Constant data
 MODELARRAY=("iid" "pi" "rho" "pirho")

 MODEL=${MODELARRAY[$(($((SGE_TASK_ID - 1)) % 4))]}

-# Finding directory
+# Finding directory
 APPLICATIONS_DIR=$(echo "$BASE_DIR/code/applications")

 echo $APPLICATIONS_DIR
--- a/code/scripts/migale_application_weboflife.sh
+++ b/code/scripts/migale_application_weboflife.sh
@ -10,7 +10,7 @@
 #$ -o logs/$JOB_NAME.$TASK_ID
 #$ -e logs/$JOB_NAME.$TASK_ID

-# Creating log directory if it doesn't exists
+# Creating log directory if it doesn't exists
 BASE_DIR="/home/$USER/work/mia-stage-2024"
 LOG_DIR=$(echo "$BASE_DIR/logs")

@ -18,12 +18,12 @@ if [ ! -d "$LOG_DIR" ]; then
    mkdir -p $LOG_DIR
 fi

-# Constant data
+# Constant data
 MODELARRAY=("iid" "pi" "rho" "pirho")

 MODEL=${MODELARRAY[$(($((SGE_TASK_ID - 1)) % 4))]}

-# Finding directory
+# Finding directory
 APPLICATIONS_DIR=$(echo "$BASE_DIR/code/applications")

 echo $APPLICATIONS_DIR
--- a/code/scripts/migale_simulations_NA_robustness.sh
+++ b/code/scripts/migale_simulations_NA_robustness.sh
@ -10,9 +10,9 @@
 #$ -o logs/$JOB_NAME.$TASK_ID
 #$ -e logs/$JOB_NAME.$TASK_ID

-# Creating log directory if it doesn't exists
+# Creating log directory if it doesn't exists

-# Constant data
+# Constant data
 STRUCTA=("nested" "modular")
 SAMPLINGA=("uniform" "row" "col" "rowcol")

@ -25,11 +25,11 @@ if [ ! -d "$LOG_DIR" ]; then
    mkdir -p $LOG_DIR
 fi

-# Finding simulations directory
+# Finding simulations directory
 SIMULATIONS_DIR=$(echo "$BASE_DIR/code/simulations")

 echo $SIMULATIONS_DIR

-# Parsing sge array id
+# Parsing sge array id

 Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct $STRUCT
--- a/code/simulations/simulations_NA_robustness.R
+++ b/code/simulations/simulations_NA_robustness.R
@ -28,7 +28,7 @@ if (length(arg) == 0L) {
    }
 }

-#  Arguments checks
+#  Arguments checks
 allowed_struct <- c("modular", "nested")

 stopifnot(
@ -70,49 +70,61 @@ max_repetition <- 10L

 # Collections
 collections <- list(
-    "iid" = c(generate_bipartite_collection(nr1, nc1,
-        pir, pic,
-        alpha, 1,
-        model = "iid",
-        return_memberships = TRUE),
+    "iid" = c(
+        generate_bipartite_collection(nr1, nc1,
+            pir, pic,
+            alpha, 1,
+            model = "iid",
+            return_memberships = TRUE
+        ),
        generate_bipartite_collection(nr2, nc2,
-        pir, pic,
-        alpha, M-1,
-        model = "iid",
-        return_memberships = TRUE)
+            pir, pic,
+            alpha, M - 1,
+            model = "iid",
+            return_memberships = TRUE
+        )
    ),
-    "pi" = c(generate_bipartite_collection(nr1, nc1,
-        pir, pic,
-        alpha, 1,
-        model = "pi",
-        return_memberships = TRUE),
+    "pi" = c(
+        generate_bipartite_collection(nr1, nc1,
+            pir, pic,
+            alpha, 1,
+            model = "pi",
+            return_memberships = TRUE
+        ),
        generate_bipartite_collection(nr2, nc2,
-        pir, pic,
-        alpha, M-1,
-        model = "pi",
-        return_memberships = TRUE)
+            pir, pic,
+            alpha, M - 1,
+            model = "pi",
+            return_memberships = TRUE
+        )
    ),
-    "rho" = c(generate_bipartite_collection(nr1, nc1,
-        pir, pic,
-        alpha, 1,
-        model = "rho",
-        return_memberships = TRUE),
+    "rho" = c(
+        generate_bipartite_collection(nr1, nc1,
+            pir, pic,
+            alpha, 1,
+            model = "rho",
+            return_memberships = TRUE
+        ),
        generate_bipartite_collection(nr2, nc2,
-        pir, pic,
-        alpha, M-1,
-        model = "rho",
-        return_memberships = TRUE)
+            pir, pic,
+            alpha, M - 1,
+            model = "rho",
+            return_memberships = TRUE
+        )
    ),
-    "pirho" = c(generate_bipartite_collection(nr1, nc1,
-        pir, pic,
-        alpha, 1,
-        model = "pirho",
-        return_memberships = TRUE),
+    "pirho" = c(
+        generate_bipartite_collection(nr1, nc1,
+            pir, pic,
+            alpha, 1,
+            model = "pirho",
+            return_memberships = TRUE
+        ),
        generate_bipartite_collection(nr2, nc2,
-        pir, pic,
-        alpha, M-1,
-        model = "pirho",
-        return_memberships = TRUE)
+            pir, pic,
+            alpha, M - 1,
+            model = "pirho",
+            return_memberships = TRUE
+        )
    )
 )

@ -124,7 +136,7 @@ conditions <- expand.grid(
 )


-#  Data params
+#  Data params
 main_dir <- file.path("code", "results", "simulations", "NA_robustness")

 if (!dir.exists(main_dir)) {
@ -142,7 +154,7 @@ if (!dir.exists(temp_dir)) {
 }

 file_save <- file.path(main_dir, paste0(
-    "NA_robustness_", start_time, "_", sampling, 
+    "NA_robustness_", start_time, "_", sampling,
    "_", struct, "_1-", nrow(conditions), ".Rds"
 ))

@ -179,7 +191,7 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {

            NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)

-            #  Computes the index as a single number, R stores matrices by column
+            #  Computes the index as a single number, R stores matrices by column
            (NAs_selected_index_exp[["col"]] - 1) *
                nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
        },
@ -190,7 +202,7 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {

            NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)

-            #  Computes the index as a single number, R stores matrices by column
+            #  Computes the index as a single number, R stores matrices by column
            (NAs_selected_index_exp[["col"]] - 1) *
                nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
        },
@ -201,7 +213,7 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
            col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected)
            NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)

-            #  Computes the index as a single number, R stores matrices by column
+            #  Computes the index as a single number, R stores matrices by column
            (NAs_selected_index_exp[["col"]] - 1) *
                nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
        }
@ -269,14 +281,13 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
            mybisbmpop[["best_fit"]][["Z"]][[1]][[2]]
        ),
        elapsed_secs = difftime(stop_time, start_time, units = "sec"),
-
        sampling = sampling,
        struct = struct
    )

    message("Finished step ", current, "/", nrow(conditions), "\n")

-    #  Saving temp
+    #  Saving temp
    temp_file_save <- file.path(temp_dir, paste0(
        "conditions_", current, "_on_",
        nrow(conditions), ".Rds"
@ -296,4 +307,4 @@ saveRDS(
    result_dataframe,
    file = file_save
 )
-message("Finished simulations.")
+message("Finished simulations.")
--- a/code/simulations/simulations_inference_bernoulli.R
+++ b/code/simulations/simulations_inference_bernoulli.R
@ -72,7 +72,7 @@ choosed_conditions <- seq.int(from = arg[1], to = arg[2])

 conditions <- conditions[choosed_conditions, ]

-#  Data params
+#  Data params
 main_dir <- file.path("code", "results", "simulations", "inference", "bernoulli")

 if (!dir.exists(main_dir)) {
@ -205,7 +205,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {

  stop_time_condition <- Sys.time()

-  ##  Preparing date for export
+  ##  Preparing date for export
  # BICLs
  sep_BICL <- sum(sep_BiSBM$BICL)
  iid_BICL <- fitted_bisbmpop_iid$best_fit$BICL
@ -318,7 +318,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
  )
  message("Finished step ", s, "/", nrow(conditions))

-  #  Saving temp
+  #  Saving temp
  temp_file_save <- file.path(temp_dir, paste0(
    "conditions_", s, "_on_",
    nrow(conditions), ".Rds"
@ -326,7 +326,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {

  saveRDS(object = data_frame_output, file = temp_file_save)

-  #  Saving inhabitual data
+  #  Saving inhabitual data
  if (all(unlist(pirho_mean_ARIs) == 1L) & any(unlist(pirho_double_ARIs) < 1L)) {
    warning("Incorrect result encountered, saving.")
    incorrect_filepath <- file.path(temp_dir, paste0(
@ -358,4 +358,4 @@ full_data_frame <- do.call(rbind, results)
 saveRDS(full_data_frame,
  file = file_save
 )
-message("Finished simulations.")
+message("Finished simulations.")
--- a/code/simulations/simulations_inference_poisson.R
+++ b/code/simulations/simulations_inference_poisson.R
@ -72,7 +72,7 @@ choosed_conditions <- seq.int(from = arg[1], to = arg[2])

 conditions <- conditions[choosed_conditions, ]

-#  Data params
+#  Data params
 main_dir <- file.path("code", "results", "simulations", "inference", "poisson")

 if (!dir.exists(main_dir)) {
@ -204,7 +204,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {

  stop_time_condition <- Sys.time()

-  ##  Preparing date for export
+  ##  Preparing date for export
  # BICLs
  sep_BICL <- sum(sep_BiSBM$BICL)
  iid_BICL <- fitted_bisbmpop_iid$best_fit$BICL
@ -317,7 +317,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
  )
  message("Finished step ", s, "/", nrow(conditions))

-  #  Saving temp
+  #  Saving temp
  temp_file_save <- file.path(temp_dir, paste0(
    "conditions_", s, "_on_",
    nrow(conditions), ".Rds"
@ -325,7 +325,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {

  saveRDS(object = data_frame_output, file = temp_file_save)

-  #  Saving inhabitual data
+  #  Saving inhabitual data
  if (all(unlist(pirho_mean_ARIs) == 1L) & any(unlist(pirho_double_ARIs) < 1L)) {
    warning("Incorrect result encountered, saving.")
    incorrect_filepath <- file.path(temp_dir, paste0(
@ -357,4 +357,4 @@ full_data_frame <- do.call(rbind, results)
 saveRDS(full_data_frame,
  file = file_save
 )
-message("Finished simulations.")
+message("Finished simulations.")
--- a/code/simulations/simulations_model_selection.R
+++ b/code/simulations/simulations_model_selection.R
@ -72,7 +72,7 @@ if (arg[2] > nrow(conditions) | arg[2] < 1) {
 choosed_conditions <- seq.int(from = arg[1], to = arg[2])
 conditions <- conditions[choosed_conditions, ]

-#  Data params
+#  Data params
 main_dir <- file.path("code", "results", "simulations", "model_selection")

 if (!dir.exists(main_dir)) {
@ -246,7 +246,7 @@ with_progress({
        )
        message("Finished step ", s, "/", nrow(conditions))

-        #  Saving temp
+        #  Saving temp
        temp_file_save <- file.path(temp_dir, paste0(
            "conditions_", s, "_on_",
            nrow(conditions), ".Rds"
@ -264,4 +264,4 @@ full_data_frame <- do.call(rbind, results)
 saveRDS(full_data_frame,
    file = file_save
 )
-message("Finished simulations.")
+message("Finished simulations.")