WOD 8/7

2023-07-08 18:28:43 +02:00 · 2023-07-08 18:28:43 +02:00 · 2f561d57ce
commit 2f561d57ce
parent fff1998904
19 changed files with 73 additions and 144 deletions
--- a/Rcodes/simulation/data/simulated_collection_clustering_iid_10-05-23-14:39:16.Rds
+++ b/Rcodes/simulation/data/simulated_collection_clustering_iid_10-05-23-14:39:16.Rds
--- a/Rcodes/simulation/data/simulated_collection_clustering_pi_10-05-23-14:47:49.Rds
+++ b/Rcodes/simulation/data/simulated_collection_clustering_pi_10-05-23-14:47:49.Rds
--- a/Rcodes/simulation/data/simulated_collection_clustering_rho_10-05-23-14:40:46.Rds
+++ b/Rcodes/simulation/data/simulated_collection_clustering_rho_10-05-23-14:40:46.Rds
--- a/Rcodes/simulation/data/simulated_collection_data_clustering_iid_08-07-23-16:51:52.Rds
+++ b/Rcodes/simulation/data/simulated_collection_data_clustering_iid_08-07-23-16:51:52.Rds
--- a/Rcodes/simulation/data/simulated_collection_data_clustering_pirho_08-07-23-17:04:36.Rds
+++ b/Rcodes/simulation/data/simulated_collection_data_clustering_pirho_08-07-23-17:04:36.Rds
--- a/Rcodes/simulation/data/simulated_collection_data_clustering_rho_08-07-23-16:58:41.Rds
+++ b/Rcodes/simulation/data/simulated_collection_data_clustering_rho_08-07-23-16:58:41.Rds
--- a/Rcodes/simulation/netclustering_analyze.Rmd
+++ b/Rcodes/simulation/netclustering_analyze.Rmd
@ -1,5 +1,6 @@
 ```{r libraries, echo = FALSE, include = FALSE}
 require("ggplot2")
 require("ggokabeito")
 require("tidyr")
 require("dplyr")
 require("patchwork")
@ -10,119 +11,27 @@ require("latex2exp")
 ```{r impoting-data, echo = FALSE}
 filenames <- list.files(
-    path = "./Rcodes/simulation/data",
+    path = "./data",
-    pattern = "simulated_collection_clustering_*",
+    pattern = "simulated_collection_data_clustering_*",
    full.names = TRUE
 )
 rep(gsub(".*_(iid|pi|rho|pirho)_.*", "\\1", filenames), each = 6)
 # data_list <- lapply(filenames, function(file) lapply(readRDS(file), function(model) model$list_clustering))
-data_list <- lapply(filenames, readRDS)
+df_netclust <- do.call("rbind", lapply(filenames, readRDS))
 ```
-for (model in data_list) {
+```{r netclustering-ARI-boxplot, echo = FALSE}
-            list_clustering <- lapply(
+#| dpi = 300,
-                seq_along(model), function(s) model[[s]]$list_of_clusterings
+#| fig.asp = 0.5,
-            )
+#| fig.cap = "\\label{}ARI of the partition obtained by clustering in function of $\\eps$"
-
+df_netclust %>%
-            list_best_partition <- lapply(
+    ggplot() +
-                seq_along(list_clustering), function(s) {
+    aes(x = as.factor(epsilon), y = ARI) +
-                    list(
+    scale_color_okabe_ito() +
-                        epsilon = result_clustering[[s]]$epsilon,
+    scale_fill_okabe_ito() +
-                        best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]]))
+    xlab(TeX("$\\epsilon$")) +
-                    )
+    guides(fill = guide_legend(title = "Model")) +
-                }
+    ylab("ARI of obtained netclustering") +
-            )
+    geom_boxplot(aes(fill = model))
            data.frame(
                epsilon = sapply(
                    list_best_partition,
                    function(best_partition) {
                        best_partition$epsilon
                    }
                ),
                ARI = sapply(
                    list_best_partition,
                    function(best_partition) {
                        aricode::ARI(rep(1:3, each = 3), unlist(
                            lapply(
                                seq_along(best_partition$best_partition),
                                function(idx) {
                                    setNames(
                                        rep(idx, 
                                        best_partition$best_partition[[idx]]$M), 
                                        best_partition$best_partition[[idx]]$net_id
                                    )
                                }
                            )
                        ))
                    }
                )
            )
 }
 lapply(
    data_list,
    function(model) {
        list_clustering <- lapply(
        seq_along(model), function(s) model[[s]]$list_of_clusterings
        )
        list_best_partition <- lapply(
            seq_along(list_clustering), function(s) {
                list(
                    epsilon = result_clustering[[s]]$epsilon,
                    best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]]))
                )
            }
        )
    data.frame(
        epsilon = sapply(
            list_best_partition,
            function(best_partition) {
                best_partition$epsilon
            }
        ),
        ARI = sapply(
            list_best_partition,
            function(best_partition) {
                aricode::ARI(rep(1:3, each = 3), unlist(
                    lapply(
                        seq_along(best_partition$best_partition),
                        function(idx) {
                            setNames(
                                rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id
                            )
                        }
                    )
                ))
            }
        )
    )
 }
 )
    data.frame(
        epsilon = sapply(
            list_best_partition,
            function(best_partition) {
                best_partition$epsilon
            }
        ),
        ARI = sapply(
            list_best_partition,
            function(best_partition) {
                aricode::ARI(rep(1:3, each = 3), unlist(
                    lapply(
                        seq_along(best_partition$best_partition),
                        function(idx) {
                            setNames(
                                rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id
                            )
                        }
                    )
                ))
            }
        )
    )
 ```
--- a/Rcodes/simulation/netclustering_analyze.tex
+++ b/Rcodes/simulation/netclustering_analyze.tex
@ -1,13 +1,8 @@
 \section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}
-\begin{verbatim}
+\begin{figure}
-## character(0)
+\centering
-\end{verbatim}
+\includegraphics{./img/99d363f6aa43bf0eba413cb994dc00b130709107.png}
-
+\caption{\label{}ARI of the partition obtained by clustering in function
-\begin{verbatim}
+of \(\eps\)}
-## list()
+\end{figure}
 \end{verbatim}
 \begin{verbatim}
 ## Error in eval(expr, envir, enclos): objet 'list_best_partition' introuvable
 \end{verbatim}
--- a/Rcodes/simulation/netclustering_check.R
+++ b/Rcodes/simulation/netclustering_check.R
@ -1,7 +1,8 @@
 require("ggplot2")
 require("tictoc")
 require("colSBM")
-devtools::load_all("R/")
+set.seed(1234)
 # Generation of conditions
 if (!exists("model_to_test")) {
@ -17,7 +18,7 @@ nc <- 75
 pi <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
 rho <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
-epsilons <- c(0.4)
+epsilons <- seq(0.1, 0.4, by = 0.1)
 if (!exists("arg")) {
    arg <- commandArgs(trailingOnly = TRUE)
@ -31,7 +32,7 @@ if (identical(arg, character(0))) {
 conditions <- tidyr::crossing(epsilons, pi, rho, repetitions)
-results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
+results <- lapply(seq_len(nrow(conditions)), function(s) {
    eps <- conditions[s, ]$epsilons
    current_pi <- conditions[s, ]$pi
    current_rho <- conditions[s, ]$rho
@ -70,7 +71,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
        nr, nc,
        current_pi, current_rho,
        alpha_assortative, 3,
-        model = model_to_test
+        model = model_to_test,
        return_memberships = TRUE
    )
    assortative_incidence <- lapply(
@ -98,7 +100,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
        nr, nc,
        current_pi, current_rho,
        alpha_core_periphery, 3,
-        model = model_to_test
+        model = model_to_test,
        return_memberships = TRUE
    )
    core_periphery_incidence <- lapply(
@ -126,7 +129,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
        nr, nc,
        current_pi, current_rho,
        alpha_disassortative, 3,
-        model = model_to_test
+        model = model_to_test,
        return_memberships = TRUE
    )
    disassortative_incidence <- lapply(
@ -184,31 +188,40 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
        colsbm_model = model_to_test,
        global_opts = list(
            nb_cores = parallel::detectCores() - 1, verbosity = 2,
-            plot_details = 0#,
+            plot_details = 0 # ,
-            #parallelization_vector = c(FALSE, FALSE, FALSE)
+            # parallelization_vector = c(FALSE, FALSE, FALSE)
        ),
        silent_parallelization = TRUE
    )
-    toc()
+
-    return(
+    best_partitions <- unlist(extract_best_bipartite_partition(list_collection))
-        list(
+    clustering <- unlist(lapply(seq_along(best_partitions), function(col_idx) {
-        epsilon = eps,
+        setNames(
-        repetitions = repetitions,
+            rep(col_idx, best_partitions[[col_idx]]$M),
-        list_of_clusterings = list_collection,
+            best_partitions[[col_idx]]$net_id
        real_block_memberships = list(
            row = real_row_clustering,
            col = real_col_clustering
        )
-    ))
+    }))
-},
+    # ARI computation
-mc.cores = parallel::detectCores() - 1,
+    clustering <- clustering[order(names(clustering))]
-mc.progress = TRUE,
+    ari <- aricode::ARI(rep(c(1, 2, 3), each = 3), clustering)
-mc.retry = -1
+
    toc()
    cat(paste("Finished", s))
    return(
        data.frame(epsilon = eps, model = model_to_test, ARI = ari)
    )
 }
 # ,
 # mc.cores = parallel::detectCores() - 1,
 # mc.progress = TRUE,
 # mc.retry = -1
 )
-saveRDS(results, file = paste0(
+data_frame_result <- do.call("rbind", results)
 saveRDS(data_frame_result, file = paste0(
    "simulation/data/",
-    "simulated_collection_clustering_",
+    "simulated_collection_data_clustering_",
    model_to_test, "_",
    format(Sys.time(), "%d-%m-%y-%X"),
    ".Rds"
--- a/figure/netclustering-ARI-boxplot-1.png
+++ b/figure/netclustering-ARI-boxplot-1.png
--- a/img/353fe7be5055ae41407f1c89793ea74014c17c8b.png
+++ b/img/353fe7be5055ae41407f1c89793ea74014c17c8b.png
--- a/img/399307245baa20546f626fa1394d31e9a0eb9241.png
+++ b/img/399307245baa20546f626fa1394d31e9a0eb9241.png
--- a/img/3ba92635cedfbf1fefd0d2078f39b33566d8d598.png
+++ b/img/3ba92635cedfbf1fefd0d2078f39b33566d8d598.png
--- a/img/5164fdb9ce3a2342098367d2af256fced27269f5.png
+++ b/img/5164fdb9ce3a2342098367d2af256fced27269f5.png
--- a/img/99d363f6aa43bf0eba413cb994dc00b130709107.png
+++ b/img/99d363f6aa43bf0eba413cb994dc00b130709107.png
--- a/img/d6ef94a0891ef7aa3c0b44ae71dfdacb8a5d3114.png
+++ b/img/d6ef94a0891ef7aa3c0b44ae71dfdacb8a5d3114.png
--- a/img/e1edd7a345d455cbbd86f793df106686f91e258b.png
+++ b/img/e1edd7a345d455cbbd86f793df106686f91e258b.png
--- a/rapport.pdf
+++ b/rapport.pdf
--- a/rapport.tex
+++ b/rapport.tex
@ -32,6 +32,9 @@
  \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
 \usepackage{booktabs}
 % Figure placement
 \floatplacement{figure}{H}
 %% Tikz Related
 \usetikzlibrary{calc,shapes,backgrounds,arrows,automata,shadows,positioning}
 \usetikzlibrary{arrows,shapes,positioning,shadows,trees,calc,backgrounds,automata,positioning}
@ -1060,6 +1063,15 @@ We illustrate our capacity to perform a partition of a collection for all
 colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}.
 \chapter{Simulation studies}\label{chap:simulation-studies}
 The below simulations are meant to test the capacities of our models.
 We assess the inference capacities of the algorithm and method, the model
 selection performances and the clustering capacities.
 \paragraph{Reproducibility} All the codes used to obtain data and to perform the
 analysis can be found on the report repository at
 \url{https://gitea.polarolouis.fr/polarolouis/rapport-CEI-MIA-2023}.
 \include{Rcodes/simulation/inference_analyze}
 \include{Rcodes/simulation/model_selection_analyze}
 \include{Rcodes/simulation/netclustering_analyze}