WOD 8/7

2023-07-08 18:28:43 +02:00 · 2023-07-08 18:28:43 +02:00 · 2f561d57ce
commit 2f561d57ce
parent fff1998904
19 changed files with 73 additions and 144 deletions
--- a/Rcodes/simulation/data/simulated_collection_clustering_iid_10-05-23-14:39:16.Rds
+++ b/Rcodes/simulation/data/simulated_collection_clustering_iid_10-05-23-14:39:16.Rds
--- a/Rcodes/simulation/data/simulated_collection_clustering_pi_10-05-23-14:47:49.Rds
+++ b/Rcodes/simulation/data/simulated_collection_clustering_pi_10-05-23-14:47:49.Rds
--- a/Rcodes/simulation/data/simulated_collection_clustering_rho_10-05-23-14:40:46.Rds
+++ b/Rcodes/simulation/data/simulated_collection_clustering_rho_10-05-23-14:40:46.Rds
--- a/Rcodes/simulation/data/simulated_collection_data_clustering_iid_08-07-23-16:51:52.Rds
+++ b/Rcodes/simulation/data/simulated_collection_data_clustering_iid_08-07-23-16:51:52.Rds
--- a/Rcodes/simulation/data/simulated_collection_data_clustering_pirho_08-07-23-17:04:36.Rds
+++ b/Rcodes/simulation/data/simulated_collection_data_clustering_pirho_08-07-23-17:04:36.Rds
--- a/Rcodes/simulation/data/simulated_collection_data_clustering_rho_08-07-23-16:58:41.Rds
+++ b/Rcodes/simulation/data/simulated_collection_data_clustering_rho_08-07-23-16:58:41.Rds
--- a/Rcodes/simulation/netclustering_analyze.Rmd
+++ b/Rcodes/simulation/netclustering_analyze.Rmd
@ -1,5 +1,6 @@
 ```{r libraries, echo = FALSE, include = FALSE}
 require("ggplot2")
+require("ggokabeito")
 require("tidyr")
 require("dplyr")
 require("patchwork")
@ -10,119 +11,27 @@ require("latex2exp")

 ```{r impoting-data, echo = FALSE}
 filenames <- list.files(
-    path = "./Rcodes/simulation/data",
-    pattern = "simulated_collection_clustering_*",
+    path = "./data",
+    pattern = "simulated_collection_data_clustering_*",
    full.names = TRUE
 )

-rep(gsub(".*_(iid|pi|rho|pirho)_.*", "\\1", filenames), each = 6)
-
 # data_list <- lapply(filenames, function(file) lapply(readRDS(file), function(model) model$list_clustering))
-data_list <- lapply(filenames, readRDS)
+df_netclust <- do.call("rbind", lapply(filenames, readRDS))

-
-for (model in data_list) {
-            list_clustering <- lapply(
-                seq_along(model), function(s) model[[s]]$list_of_clusterings
-            )
-
-            list_best_partition <- lapply(
-                seq_along(list_clustering), function(s) {
-                    list(
-                        epsilon = result_clustering[[s]]$epsilon,
-                        best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]]))
-                    )
-                }
-            )
-            data.frame(
-                epsilon = sapply(
-                    list_best_partition,
-                    function(best_partition) {
-                        best_partition$epsilon
-                    }
-                ),
-                ARI = sapply(
-                    list_best_partition,
-                    function(best_partition) {
-                        aricode::ARI(rep(1:3, each = 3), unlist(
-                            lapply(
-                                seq_along(best_partition$best_partition),
-                                function(idx) {
-                                    setNames(
-                                        rep(idx, 
-                                        best_partition$best_partition[[idx]]$M), 
-                                        best_partition$best_partition[[idx]]$net_id
-                                    )
-                                }
-                            )
-                        ))
-                    }
-                )
-            )
-}
-
-lapply(
-    data_list,
-    function(model) {
-        list_clustering <- lapply(
-        seq_along(model), function(s) model[[s]]$list_of_clusterings
-        )
-
-        list_best_partition <- lapply(
-            seq_along(list_clustering), function(s) {
-                list(
-                    epsilon = result_clustering[[s]]$epsilon,
-                    best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]]))
-                )
-            }
-        )
-    data.frame(
-        epsilon = sapply(
-            list_best_partition,
-            function(best_partition) {
-                best_partition$epsilon
-            }
-        ),
-        ARI = sapply(
-            list_best_partition,
-            function(best_partition) {
-                aricode::ARI(rep(1:3, each = 3), unlist(
-                    lapply(
-                        seq_along(best_partition$best_partition),
-                        function(idx) {
-                            setNames(
-                                rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id
-                            )
-                        }
-                    )
-                ))
-            }
-        )
-    )
-
-}
-)
-    data.frame(
-        epsilon = sapply(
-            list_best_partition,
-            function(best_partition) {
-                best_partition$epsilon
-            }
-        ),
-        ARI = sapply(
-            list_best_partition,
-            function(best_partition) {
-                aricode::ARI(rep(1:3, each = 3), unlist(
-                    lapply(
-                        seq_along(best_partition$best_partition),
-                        function(idx) {
-                            setNames(
-                                rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id
-                            )
-                        }
-                    )
-                ))
-            }
-        )
-    )
+```
+
+```{r netclustering-ARI-boxplot, echo = FALSE}
+#| dpi = 300,
+#| fig.asp = 0.5,
+#| fig.cap = "\\label{}ARI of the partition obtained by clustering in function of $\\eps$"
+df_netclust %>%
+    ggplot() +
+    aes(x = as.factor(epsilon), y = ARI) +
+    scale_color_okabe_ito() +
+    scale_fill_okabe_ito() +
+    xlab(TeX("$\\epsilon$")) +
+    guides(fill = guide_legend(title = "Model")) +
+    ylab("ARI of obtained netclustering") +
+    geom_boxplot(aes(fill = model))
 ```
--- a/Rcodes/simulation/netclustering_analyze.tex
+++ b/Rcodes/simulation/netclustering_analyze.tex
@ -1,13 +1,8 @@
 \section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}

-\begin{verbatim}
-## character(0)
-\end{verbatim}
-
-\begin{verbatim}
-## list()
-\end{verbatim}
-
-\begin{verbatim}
-## Error in eval(expr, envir, enclos): objet 'list_best_partition' introuvable
-\end{verbatim}
+\begin{figure}
+\centering
+\includegraphics{./img/99d363f6aa43bf0eba413cb994dc00b130709107.png}
+\caption{\label{}ARI of the partition obtained by clustering in function
+of \(\eps\)}
+\end{figure}
--- a/Rcodes/simulation/netclustering_check.R
+++ b/Rcodes/simulation/netclustering_check.R
@ -1,7 +1,8 @@
 require("ggplot2")
 require("tictoc")
+require("colSBM")

-devtools::load_all("R/")
+set.seed(1234)

 # Generation of conditions
 if (!exists("model_to_test")) {
@ -17,7 +18,7 @@ nc <- 75

 pi <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
 rho <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
-epsilons <- c(0.4)
+epsilons <- seq(0.1, 0.4, by = 0.1)

 if (!exists("arg")) {
    arg <- commandArgs(trailingOnly = TRUE)
@ -31,7 +32,7 @@ if (identical(arg, character(0))) {

 conditions <- tidyr::crossing(epsilons, pi, rho, repetitions)

-results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
+results <- lapply(seq_len(nrow(conditions)), function(s) {
    eps <- conditions[s, ]$epsilons
    current_pi <- conditions[s, ]$pi
    current_rho <- conditions[s, ]$rho
@ -70,7 +71,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
        nr, nc,
        current_pi, current_rho,
        alpha_assortative, 3,
-        model = model_to_test
+        model = model_to_test,
+        return_memberships = TRUE
    )

    assortative_incidence <- lapply(
@ -98,7 +100,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
        nr, nc,
        current_pi, current_rho,
        alpha_core_periphery, 3,
-        model = model_to_test
+        model = model_to_test,
+        return_memberships = TRUE
    )

    core_periphery_incidence <- lapply(
@ -126,7 +129,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
        nr, nc,
        current_pi, current_rho,
        alpha_disassortative, 3,
-        model = model_to_test
+        model = model_to_test,
+        return_memberships = TRUE
    )

    disassortative_incidence <- lapply(
@ -184,31 +188,40 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
        colsbm_model = model_to_test,
        global_opts = list(
            nb_cores = parallel::detectCores() - 1, verbosity = 2,
-            plot_details = 0#,
-            #parallelization_vector = c(FALSE, FALSE, FALSE)
+            plot_details = 0 # ,
+            # parallelization_vector = c(FALSE, FALSE, FALSE)
        ),
        silent_parallelization = TRUE
    )
-    toc()
-    return(
-        list(
-        epsilon = eps,
-        repetitions = repetitions,
-        list_of_clusterings = list_collection,
-        real_block_memberships = list(
-            row = real_row_clustering,
-            col = real_col_clustering
+
+    best_partitions <- unlist(extract_best_bipartite_partition(list_collection))
+    clustering <- unlist(lapply(seq_along(best_partitions), function(col_idx) {
+        setNames(
+            rep(col_idx, best_partitions[[col_idx]]$M),
+            best_partitions[[col_idx]]$net_id
        )
-    ))
-},
-mc.cores = parallel::detectCores() - 1,
-mc.progress = TRUE,
-mc.retry = -1
+    }))
+    # ARI computation
+    clustering <- clustering[order(names(clustering))]
+    ari <- aricode::ARI(rep(c(1, 2, 3), each = 3), clustering)
+
+    toc()
+    cat(paste("Finished", s))
+    return(
+        data.frame(epsilon = eps, model = model_to_test, ARI = ari)
+    )
+}
+# ,
+# mc.cores = parallel::detectCores() - 1,
+# mc.progress = TRUE,
+# mc.retry = -1
 )

-saveRDS(results, file = paste0(
+data_frame_result <- do.call("rbind", results)
+
+saveRDS(data_frame_result, file = paste0(
    "simulation/data/",
-    "simulated_collection_clustering_",
+    "simulated_collection_data_clustering_",
    model_to_test, "_",
    format(Sys.time(), "%d-%m-%y-%X"),
    ".Rds"
--- a/figure/netclustering-ARI-boxplot-1.png
+++ b/figure/netclustering-ARI-boxplot-1.png
--- a/img/353fe7be5055ae41407f1c89793ea74014c17c8b.png
+++ b/img/353fe7be5055ae41407f1c89793ea74014c17c8b.png
--- a/img/399307245baa20546f626fa1394d31e9a0eb9241.png
+++ b/img/399307245baa20546f626fa1394d31e9a0eb9241.png
--- a/img/3ba92635cedfbf1fefd0d2078f39b33566d8d598.png
+++ b/img/3ba92635cedfbf1fefd0d2078f39b33566d8d598.png
--- a/img/5164fdb9ce3a2342098367d2af256fced27269f5.png
+++ b/img/5164fdb9ce3a2342098367d2af256fced27269f5.png
--- a/img/99d363f6aa43bf0eba413cb994dc00b130709107.png
+++ b/img/99d363f6aa43bf0eba413cb994dc00b130709107.png
--- a/img/d6ef94a0891ef7aa3c0b44ae71dfdacb8a5d3114.png
+++ b/img/d6ef94a0891ef7aa3c0b44ae71dfdacb8a5d3114.png
--- a/img/e1edd7a345d455cbbd86f793df106686f91e258b.png
+++ b/img/e1edd7a345d455cbbd86f793df106686f91e258b.png
--- a/rapport.pdf
+++ b/rapport.pdf
--- a/rapport.tex
+++ b/rapport.tex
@ -32,6 +32,9 @@
  \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
 \usepackage{booktabs}

+% Figure placement
+\floatplacement{figure}{H}
+
 %% Tikz Related
 \usetikzlibrary{calc,shapes,backgrounds,arrows,automata,shadows,positioning}
 \usetikzlibrary{arrows,shapes,positioning,shadows,trees,calc,backgrounds,automata,positioning}
@ -1060,6 +1063,15 @@ We illustrate our capacity to perform a partition of a collection for all
 colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}.

 \chapter{Simulation studies}\label{chap:simulation-studies}
+
+The below simulations are meant to test the capacities of our models.
+We assess the inference capacities of the algorithm and method, the model
+selection performances and the clustering capacities.
+
+\paragraph{Reproducibility} All the codes used to obtain data and to perform the
+analysis can be found on the report repository at
+\url{https://gitea.polarolouis.fr/polarolouis/rapport-CEI-MIA-2023}.
+
 \include{Rcodes/simulation/inference_analyze}
 \include{Rcodes/simulation/model_selection_analyze}
 \include{Rcodes/simulation/netclustering_analyze}