From dcacfb1676d1ac6b35df38a2cb3c72be4ab2845b Mon Sep 17 00:00:00 2001 From: Louis Lacoste Date: Tue, 16 Jul 2024 20:12:22 +0200 Subject: [PATCH] =?UTF-8?q?simulations=20:=20network=20clustering=20mis=20?= =?UTF-8?q?=C3=A0=20jour?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../simulations_network_clustering.R | 382 +++++++++--------- 1 file changed, 188 insertions(+), 194 deletions(-) diff --git a/code/simulations/simulations_network_clustering.R b/code/simulations/simulations_network_clustering.R index 8ffe0fc..3de8f35 100644 --- a/code/simulations/simulations_network_clustering.R +++ b/code/simulations/simulations_network_clustering.R @@ -1,233 +1,227 @@ necessary_packages <- c("remotes", "colSBM") -if (!(necessary_packages %in% installed.packages())) { +if (!all(necessary_packages %in% installed.packages())) { install.packages(necessary_packages[-length(necessary_packages)]) remotes::install_github(repo = "Chabert-Liddell/colSBM@merge-bipartite-2") } suppressPackageStartupMessages(library("colSBM")) +future::plan(future::multicore()) set.seed(1234) -# Generation of conditions -if (!exists("model_to_test")) { - model_to_test <- "iid" -} - -if (!exists("repetitions")) { - repetitions <- seq.int(30) -} nr <- 75 nc <- 75 pi <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE) rho <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE) +repetitions <- seq.int(30) epsilons <- seq(0.1, 0.4, by = 0.1) +models <- c("iid", "pi", "rho", "pirho") -if (!exists("arg")) { - arg <- commandArgs(trailingOnly = TRUE) -} +conditions <- tidyr::crossing(epsilons, repetitions, models) +results <- future.apply::future_lapply( + seq_len(nrow(conditions)), function(s) { + eps <- conditions[s, ]$epsilons + current_pi <- pi + current_rho <- rho + current_model <- conditions[s, ]$models -if (identical(arg, character(0L))) { - model_to_test <- "iid" -} else { - model_to_test <- arg -} + message() -conditions <- tidyr::crossing(epsilons, pi, rho, repetitions) + alpha_assortative <- matrix(0.3, nrow = 3, ncol = 3) + + matrix( + c( + eps, -0.5 * eps, -0.5 * eps, + -0.5 * eps, eps, -0.5 * eps, + -0.5 * eps, -0.5 * eps, eps + ), + nrow = 3, byrow = TRUE + ) -results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) { - eps <- conditions[s, ]$epsilons - current_pi <- conditions[s, ]$pi - current_rho <- conditions[s, ]$rho + alpha_core_periphery <- matrix(0.3, nrow = 3, ncol = 3) + + matrix( + c( + 1.5 * eps, eps, 0.5 * eps, + eps, 0.5 * eps, 0, + 0.5 * eps, 0, -0.5 * eps + ), + nrow = 3, byrow = TRUE + ) - alpha_assortative <- matrix(0.3, nrow = 3, ncol = 3) + - matrix( - c( - eps, -0.5 * eps, -0.5 * eps, - -0.5 * eps, eps, -0.5 * eps, - -0.5 * eps, -0.5 * eps, eps + alpha_disassortative <- matrix(0.3, nrow = 3, ncol = 3) + + matrix( + c( + -0.5 * eps, eps, eps, + eps, -0.5 * eps, eps, + eps, eps, -0.5 * eps + ), + nrow = 3, byrow = TRUE + ) + + assortative_collection <- generate_bipartite_collection( + nr, nc, + current_pi, current_rho, + alpha_assortative, 3, + model = current_model, + return_memberships = TRUE + ) + + assortative_incidence <- lapply( + seq_along(assortative_collection), + function(m) { + return(assortative_collection[[m]]$incidence_matrix) + } + ) + + assortative_row_clustering <- lapply( + seq_along(assortative_collection), + function(m) { + return(assortative_collection[[m]]$row_clustering) + } + ) + + assortative_col_clustering <- lapply( + seq_along(assortative_collection), + function(m) { + return(assortative_collection[[m]]$row_clustering) + } + ) + + core_periphery_collection <- generate_bipartite_collection( + nr, nc, + current_pi, current_rho, + alpha_core_periphery, 3, + model = current_model, + return_memberships = TRUE + ) + + core_periphery_incidence <- lapply( + seq_along(core_periphery_collection), + function(m) { + return(core_periphery_collection[[m]]$incidence_matrix) + } + ) + + core_periphery_row_clustering <- lapply( + seq_along(core_periphery_collection), + function(m) { + return(core_periphery_collection[[m]]$row_clustering) + } + ) + + core_periphery_col_clustering <- lapply( + seq_along(core_periphery_collection), + function(m) { + return(core_periphery_collection[[m]]$row_clustering) + } + ) + + disassortative_collection <- generate_bipartite_collection( + nr, nc, + current_pi, current_rho, + alpha_disassortative, 3, + model = current_model, + return_memberships = TRUE + ) + + disassortative_incidence <- lapply( + seq_along(disassortative_collection), + function(m) { + return(disassortative_collection[[m]]$incidence_matrix) + } + ) + + disassortative_row_clustering <- lapply( + seq_along(disassortative_collection), + function(m) { + return(disassortative_collection[[m]]$row_clustering) + } + ) + + disassortative_col_clustering <- lapply( + seq_along(disassortative_collection), + function(m) { + return(disassortative_collection[[m]]$row_clustering) + } + ) + + real_row_clustering <- append( + append( + assortative_row_clustering, + core_periphery_row_clustering ), - nrow = 3, byrow = TRUE + disassortative_row_clustering ) - alpha_core_periphery <- matrix(0.3, nrow = 3, ncol = 3) + - matrix( - c( - 1.5 * eps, eps, 0.5 * eps, - eps, 0.5 * eps, 0, - 0.5 * eps, 0, -0.5 * eps + real_col_clustering <- append( + append( + assortative_col_clustering, + core_periphery_col_clustering ), - nrow = 3, byrow = TRUE + disassortative_col_clustering ) - alpha_disassortative <- matrix(0.3, nrow = 3, ncol = 3) + - matrix( - c( - -0.5 * eps, eps, eps, - eps, -0.5 * eps, eps, - eps, eps, -0.5 * eps + incidence_matrices <- append( + append( + assortative_incidence, + core_periphery_incidence ), - nrow = 3, byrow = TRUE + disassortative_incidence ) - assortative_collection <- generate_bipartite_collectionge( - nr, nc, - current_pi, current_rho, - alpha_assortative, 3, - model = model_to_test, - return_memberships = TRUE - ) + netids <- rep(c("as", "cp", "dis"), each = 3) - assortative_incidence <- lapply( - seq_along(assortative_collection), - function(m) { - return(assortative_collection[[m]]$incidence_matrix) - } - ) - - assortative_row_clustering <- lapply( - seq_along(assortative_collection), - function(m) { - return(assortative_collection[[m]]$row_clustering) - } - ) - - assortative_col_clustering <- lapply( - seq_along(assortative_collection), - function(m) { - return(assortative_collection[[m]]$row_clustering) - } - ) - - core_periphery_collection <- generate_bipartite_collection( - nr, nc, - current_pi, current_rho, - alpha_core_periphery, 3, - model = model_to_test, - return_memberships = TRUE - ) - - core_periphery_incidence <- lapply( - seq_along(core_periphery_collection), - function(m) { - return(core_periphery_collection[[m]]$incidence_matrix) - } - ) - - core_periphery_row_clustering <- lapply( - seq_along(core_periphery_collection), - function(m) { - return(core_periphery_collection[[m]]$row_clustering) - } - ) - - core_periphery_col_clustering <- lapply( - seq_along(core_periphery_collection), - function(m) { - return(core_periphery_collection[[m]]$row_clustering) - } - ) - - disassortative_collection <- generate_bipartite_collection( - nr, nc, - current_pi, current_rho, - alpha_disassortative, 3, - model = model_to_test, - return_memberships = TRUE - ) - - disassortative_incidence <- lapply( - seq_along(disassortative_collection), - function(m) { - return(disassortative_collection[[m]]$incidence_matrix) - } - ) - - disassortative_row_clustering <- lapply( - seq_along(disassortative_collection), - function(m) { - return(disassortative_collection[[m]]$row_clustering) - } - ) - - disassortative_col_clustering <- lapply( - seq_along(disassortative_collection), - function(m) { - return(disassortative_collection[[m]]$row_clustering) - } - ) - - real_row_clustering <- append( - append( - assortative_row_clustering, - core_periphery_row_clustering - ), - disassortative_row_clustering - ) - - real_col_clustering <- append( - append( - assortative_col_clustering, - core_periphery_col_clustering - ), - disassortative_col_clustering - ) - - incidence_matrices <- append( - append( - assortative_incidence, - core_periphery_incidence - ), - disassortative_incidence - ) - - netids <- rep(c("as", "cp", "dis"), each = 3) - - tictoc::tic() - list_collection <- clusterize_bipartite_networks( - netlist = incidence_matrices, - net_id = netids, - nb_run = 1, - colsbm_model = model_to_test, - global_opts = list( - nb_cores = parallel::detectCores() - 1, verbosity = 2, - plot_details = 0 # , - # parallelization_vector = c(FALSE, FALSE, FALSE) - ), - silent_parallelization = TRUE - ) - - best_partitions <- unlist( extract_best_partition(list_collection)) - if (!is(best_partitions, "list")) { - best_partitions <- list(best_partitions) - } - clustering <- unlist(lapply(seq_along(best_partitions), function(col_idx) { - setNames( - rep(col_idx, best_partitions[[col_idx]]$M), - best_partitions[[col_idx]]$net_id + list_collection <- clusterize_bipartite_networks( + netlist = incidence_matrices, + net_id = netids, + nb_run = 3L, + colsbm_model = current_model, + global_opts = list( + nb_cores = parallelly::availableCores(omit = 1L), verbosity = 2, + plot_details = 0 # , + # parallelization_vector = c(FALSE, FALSE, FALSE) + ) ) - })) - # ARI computation - clustering <- clustering[order(names(clustering))] - ari <- aricode::ARI(rep(c(1, 2, 3), each = 3), clustering) - tictoc::toc() - return( - data.frame(epsilon = eps, model = model_to_test, ARI = ari) - ) -}, -mc.cores = parallel::detectCores() - 1, -mc.progress = TRUE, -mc.retry = -1 + best_partitions <- unlist(extract_best_partition(list_collection)) + if (!is(best_partitions, "list")) { + best_partitions <- list(best_partitions) + } + clustering <- unlist(lapply(seq_along(best_partitions), function(col_idx) { + setNames( + rep(col_idx, best_partitions[[col_idx]]$M), + best_partitions[[col_idx]]$net_id + ) + })) + # ARI computation + clustering <- clustering[order(names(clustering))] + ari <- aricode::ARI(rep(c(1, 2, 3), each = 3), clustering) + + return( + data.frame(epsilon = eps, model = current_model, ARI = ari) + ) + }, + future.seed = NULL ) data_frame_result <- do.call("rbind", results) -saveRDS(data_frame_result, file = paste0( - "simulation/data/", - "simulated_collection_data_clustering_", - model_to_test, "_", - format(Sys.time(), "%d-%m-%y-%X"), +save_folder <- here( + "code", "results", "simulations", "clustering", + "9collection" +) + +if (!dir.exists(save_folder)) { + dir.create(save_folder) +} +save_filename <- paste0( + "9collection_data_clustering_", + format(Sys.time(), "%d-%m-%y-%H-%M-%S"), ".Rds" +) + +saveRDS(data_frame_result, file = file.path( + save_folder, + save_filename ))