diff --git a/Rcodes/simulation/data/simulated_collection_clustering_iid_10-05-23-14:39:16.Rds b/Rcodes/simulation/data/simulated_collection_clustering_iid_10-05-23-14:39:16.Rds deleted file mode 100644 index 417a5dc..0000000 Binary files a/Rcodes/simulation/data/simulated_collection_clustering_iid_10-05-23-14:39:16.Rds and /dev/null differ diff --git a/Rcodes/simulation/data/simulated_collection_clustering_pi_10-05-23-14:47:49.Rds b/Rcodes/simulation/data/simulated_collection_clustering_pi_10-05-23-14:47:49.Rds deleted file mode 100644 index b3d361b..0000000 Binary files a/Rcodes/simulation/data/simulated_collection_clustering_pi_10-05-23-14:47:49.Rds and /dev/null differ diff --git a/Rcodes/simulation/data/simulated_collection_clustering_rho_10-05-23-14:40:46.Rds b/Rcodes/simulation/data/simulated_collection_clustering_rho_10-05-23-14:40:46.Rds deleted file mode 100644 index 806a682..0000000 Binary files a/Rcodes/simulation/data/simulated_collection_clustering_rho_10-05-23-14:40:46.Rds and /dev/null differ diff --git a/Rcodes/simulation/data/simulated_collection_data_clustering_iid_08-07-23-16:51:52.Rds b/Rcodes/simulation/data/simulated_collection_data_clustering_iid_08-07-23-16:51:52.Rds new file mode 100644 index 0000000..38c4c63 Binary files /dev/null and b/Rcodes/simulation/data/simulated_collection_data_clustering_iid_08-07-23-16:51:52.Rds differ diff --git a/Rcodes/simulation/data/simulated_collection_data_clustering_pirho_08-07-23-17:04:36.Rds b/Rcodes/simulation/data/simulated_collection_data_clustering_pirho_08-07-23-17:04:36.Rds new file mode 100644 index 0000000..8e91546 Binary files /dev/null and b/Rcodes/simulation/data/simulated_collection_data_clustering_pirho_08-07-23-17:04:36.Rds differ diff --git a/Rcodes/simulation/data/simulated_collection_data_clustering_rho_08-07-23-16:58:41.Rds b/Rcodes/simulation/data/simulated_collection_data_clustering_rho_08-07-23-16:58:41.Rds new file mode 100644 index 0000000..4a94c99 Binary files /dev/null and b/Rcodes/simulation/data/simulated_collection_data_clustering_rho_08-07-23-16:58:41.Rds differ diff --git a/Rcodes/simulation/netclustering_analyze.Rmd b/Rcodes/simulation/netclustering_analyze.Rmd index 54e08fa..ba0c0be 100644 --- a/Rcodes/simulation/netclustering_analyze.Rmd +++ b/Rcodes/simulation/netclustering_analyze.Rmd @@ -1,5 +1,6 @@ ```{r libraries, echo = FALSE, include = FALSE} require("ggplot2") +require("ggokabeito") require("tidyr") require("dplyr") require("patchwork") @@ -10,119 +11,27 @@ require("latex2exp") ```{r impoting-data, echo = FALSE} filenames <- list.files( - path = "./Rcodes/simulation/data", - pattern = "simulated_collection_clustering_*", + path = "./data", + pattern = "simulated_collection_data_clustering_*", full.names = TRUE ) -rep(gsub(".*_(iid|pi|rho|pirho)_.*", "\\1", filenames), each = 6) - # data_list <- lapply(filenames, function(file) lapply(readRDS(file), function(model) model$list_clustering)) -data_list <- lapply(filenames, readRDS) +df_netclust <- do.call("rbind", lapply(filenames, readRDS)) +``` -for (model in data_list) { - list_clustering <- lapply( - seq_along(model), function(s) model[[s]]$list_of_clusterings - ) - - list_best_partition <- lapply( - seq_along(list_clustering), function(s) { - list( - epsilon = result_clustering[[s]]$epsilon, - best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]])) - ) - } - ) - data.frame( - epsilon = sapply( - list_best_partition, - function(best_partition) { - best_partition$epsilon - } - ), - ARI = sapply( - list_best_partition, - function(best_partition) { - aricode::ARI(rep(1:3, each = 3), unlist( - lapply( - seq_along(best_partition$best_partition), - function(idx) { - setNames( - rep(idx, - best_partition$best_partition[[idx]]$M), - best_partition$best_partition[[idx]]$net_id - ) - } - ) - )) - } - ) - ) -} - -lapply( - data_list, - function(model) { - list_clustering <- lapply( - seq_along(model), function(s) model[[s]]$list_of_clusterings - ) - - list_best_partition <- lapply( - seq_along(list_clustering), function(s) { - list( - epsilon = result_clustering[[s]]$epsilon, - best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]])) - ) - } - ) - data.frame( - epsilon = sapply( - list_best_partition, - function(best_partition) { - best_partition$epsilon - } - ), - ARI = sapply( - list_best_partition, - function(best_partition) { - aricode::ARI(rep(1:3, each = 3), unlist( - lapply( - seq_along(best_partition$best_partition), - function(idx) { - setNames( - rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id - ) - } - ) - )) - } - ) - ) - -} -) - data.frame( - epsilon = sapply( - list_best_partition, - function(best_partition) { - best_partition$epsilon - } - ), - ARI = sapply( - list_best_partition, - function(best_partition) { - aricode::ARI(rep(1:3, each = 3), unlist( - lapply( - seq_along(best_partition$best_partition), - function(idx) { - setNames( - rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id - ) - } - ) - )) - } - ) - ) +```{r netclustering-ARI-boxplot, echo = FALSE} +#| dpi = 300, +#| fig.asp = 0.5, +#| fig.cap = "\\label{}ARI of the partition obtained by clustering in function of $\\eps$" +df_netclust %>% + ggplot() + + aes(x = as.factor(epsilon), y = ARI) + + scale_color_okabe_ito() + + scale_fill_okabe_ito() + + xlab(TeX("$\\epsilon$")) + + guides(fill = guide_legend(title = "Model")) + + ylab("ARI of obtained netclustering") + + geom_boxplot(aes(fill = model)) ``` \ No newline at end of file diff --git a/Rcodes/simulation/netclustering_analyze.tex b/Rcodes/simulation/netclustering_analyze.tex index b3e5948..2c690d6 100644 --- a/Rcodes/simulation/netclustering_analyze.tex +++ b/Rcodes/simulation/netclustering_analyze.tex @@ -1,13 +1,8 @@ \section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks} -\begin{verbatim} -## character(0) -\end{verbatim} - -\begin{verbatim} -## list() -\end{verbatim} - -\begin{verbatim} -## Error in eval(expr, envir, enclos): objet 'list_best_partition' introuvable -\end{verbatim} +\begin{figure} +\centering +\includegraphics{./img/99d363f6aa43bf0eba413cb994dc00b130709107.png} +\caption{\label{}ARI of the partition obtained by clustering in function +of \(\eps\)} +\end{figure} diff --git a/Rcodes/simulation/netclustering_check.R b/Rcodes/simulation/netclustering_check.R index 9422528..1b027bc 100644 --- a/Rcodes/simulation/netclustering_check.R +++ b/Rcodes/simulation/netclustering_check.R @@ -1,7 +1,8 @@ require("ggplot2") require("tictoc") +require("colSBM") -devtools::load_all("R/") +set.seed(1234) # Generation of conditions if (!exists("model_to_test")) { @@ -17,7 +18,7 @@ nc <- 75 pi <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE) rho <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE) -epsilons <- c(0.4) +epsilons <- seq(0.1, 0.4, by = 0.1) if (!exists("arg")) { arg <- commandArgs(trailingOnly = TRUE) @@ -31,7 +32,7 @@ if (identical(arg, character(0))) { conditions <- tidyr::crossing(epsilons, pi, rho, repetitions) -results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) { +results <- lapply(seq_len(nrow(conditions)), function(s) { eps <- conditions[s, ]$epsilons current_pi <- conditions[s, ]$pi current_rho <- conditions[s, ]$rho @@ -70,7 +71,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) { nr, nc, current_pi, current_rho, alpha_assortative, 3, - model = model_to_test + model = model_to_test, + return_memberships = TRUE ) assortative_incidence <- lapply( @@ -98,7 +100,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) { nr, nc, current_pi, current_rho, alpha_core_periphery, 3, - model = model_to_test + model = model_to_test, + return_memberships = TRUE ) core_periphery_incidence <- lapply( @@ -126,7 +129,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) { nr, nc, current_pi, current_rho, alpha_disassortative, 3, - model = model_to_test + model = model_to_test, + return_memberships = TRUE ) disassortative_incidence <- lapply( @@ -184,31 +188,40 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) { colsbm_model = model_to_test, global_opts = list( nb_cores = parallel::detectCores() - 1, verbosity = 2, - plot_details = 0#, - #parallelization_vector = c(FALSE, FALSE, FALSE) + plot_details = 0 # , + # parallelization_vector = c(FALSE, FALSE, FALSE) ), silent_parallelization = TRUE ) - toc() - return( - list( - epsilon = eps, - repetitions = repetitions, - list_of_clusterings = list_collection, - real_block_memberships = list( - row = real_row_clustering, - col = real_col_clustering + + best_partitions <- unlist(extract_best_bipartite_partition(list_collection)) + clustering <- unlist(lapply(seq_along(best_partitions), function(col_idx) { + setNames( + rep(col_idx, best_partitions[[col_idx]]$M), + best_partitions[[col_idx]]$net_id ) - )) -}, -mc.cores = parallel::detectCores() - 1, -mc.progress = TRUE, -mc.retry = -1 + })) + # ARI computation + clustering <- clustering[order(names(clustering))] + ari <- aricode::ARI(rep(c(1, 2, 3), each = 3), clustering) + + toc() + cat(paste("Finished", s)) + return( + data.frame(epsilon = eps, model = model_to_test, ARI = ari) + ) +} +# , +# mc.cores = parallel::detectCores() - 1, +# mc.progress = TRUE, +# mc.retry = -1 ) -saveRDS(results, file = paste0( +data_frame_result <- do.call("rbind", results) + +saveRDS(data_frame_result, file = paste0( "simulation/data/", - "simulated_collection_clustering_", + "simulated_collection_data_clustering_", model_to_test, "_", format(Sys.time(), "%d-%m-%y-%X"), ".Rds" diff --git a/figure/netclustering-ARI-boxplot-1.png b/figure/netclustering-ARI-boxplot-1.png new file mode 100644 index 0000000..3709395 Binary files /dev/null and b/figure/netclustering-ARI-boxplot-1.png differ diff --git a/img/353fe7be5055ae41407f1c89793ea74014c17c8b.png b/img/353fe7be5055ae41407f1c89793ea74014c17c8b.png new file mode 100644 index 0000000..8c0788a Binary files /dev/null and b/img/353fe7be5055ae41407f1c89793ea74014c17c8b.png differ diff --git a/img/399307245baa20546f626fa1394d31e9a0eb9241.png b/img/399307245baa20546f626fa1394d31e9a0eb9241.png new file mode 100644 index 0000000..8a9e96e Binary files /dev/null and b/img/399307245baa20546f626fa1394d31e9a0eb9241.png differ diff --git a/img/3ba92635cedfbf1fefd0d2078f39b33566d8d598.png b/img/3ba92635cedfbf1fefd0d2078f39b33566d8d598.png new file mode 100644 index 0000000..3118a67 Binary files /dev/null and b/img/3ba92635cedfbf1fefd0d2078f39b33566d8d598.png differ diff --git a/img/5164fdb9ce3a2342098367d2af256fced27269f5.png b/img/5164fdb9ce3a2342098367d2af256fced27269f5.png new file mode 100644 index 0000000..9bab896 Binary files /dev/null and b/img/5164fdb9ce3a2342098367d2af256fced27269f5.png differ diff --git a/img/99d363f6aa43bf0eba413cb994dc00b130709107.png b/img/99d363f6aa43bf0eba413cb994dc00b130709107.png new file mode 100644 index 0000000..3709395 Binary files /dev/null and b/img/99d363f6aa43bf0eba413cb994dc00b130709107.png differ diff --git a/img/d6ef94a0891ef7aa3c0b44ae71dfdacb8a5d3114.png b/img/d6ef94a0891ef7aa3c0b44ae71dfdacb8a5d3114.png new file mode 100644 index 0000000..6a2e7b2 Binary files /dev/null and b/img/d6ef94a0891ef7aa3c0b44ae71dfdacb8a5d3114.png differ diff --git a/img/e1edd7a345d455cbbd86f793df106686f91e258b.png b/img/e1edd7a345d455cbbd86f793df106686f91e258b.png new file mode 100644 index 0000000..5abeaec Binary files /dev/null and b/img/e1edd7a345d455cbbd86f793df106686f91e258b.png differ diff --git a/rapport.pdf b/rapport.pdf index bf1389f..0afcc03 100644 Binary files a/rapport.pdf and b/rapport.pdf differ diff --git a/rapport.tex b/rapport.tex index e623056..49cab1e 100644 --- a/rapport.tex +++ b/rapport.tex @@ -32,6 +32,9 @@ \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} \usepackage{booktabs} +% Figure placement +\floatplacement{figure}{H} + %% Tikz Related \usetikzlibrary{calc,shapes,backgrounds,arrows,automata,shadows,positioning} \usetikzlibrary{arrows,shapes,positioning,shadows,trees,calc,backgrounds,automata,positioning} @@ -1060,6 +1063,15 @@ We illustrate our capacity to perform a partition of a collection for all colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}. \chapter{Simulation studies}\label{chap:simulation-studies} + +The below simulations are meant to test the capacities of our models. +We assess the inference capacities of the algorithm and method, the model +selection performances and the clustering capacities. + +\paragraph{Reproducibility} All the codes used to obtain data and to perform the +analysis can be found on the report repository at +\url{https://gitea.polarolouis.fr/polarolouis/rapport-CEI-MIA-2023}. + \include{Rcodes/simulation/inference_analyze} \include{Rcodes/simulation/model_selection_analyze} \include{Rcodes/simulation/netclustering_analyze}