From 4259dd53be14b6c3cc35e677a703d233f42e079f Mon Sep 17 00:00:00 2001 From: Louis Lacoste Date: Wed, 11 Sep 2024 12:07:32 +0200 Subject: [PATCH] simulations: adding large network clustering with 90 networks collections --- .../simulations_large_network_clustering.R | 241 ++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 code/simulations/simulations_large_network_clustering.R diff --git a/code/simulations/simulations_large_network_clustering.R b/code/simulations/simulations_large_network_clustering.R new file mode 100644 index 0000000..a81a221 --- /dev/null +++ b/code/simulations/simulations_large_network_clustering.R @@ -0,0 +1,241 @@ +necessary_packages <- c("remotes", "colSBM") +library(here) + +options(future.globals.maxSize = Inf) + +if (!all(necessary_packages %in% installed.packages())) { + install.packages(necessary_packages[-length(necessary_packages)]) + remotes::install_github(repo = "Chabert-Liddell/colSBM@merge-bipartite-2") +} +suppressPackageStartupMessages(library("colSBM")) +future::plan(future::multisession(workers = availableWorkers())) + +set.seed(0L) + +M <- 90 + +nr <- 75 +nc <- 75 + +pi <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE) +rho <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE) +repetitions <- seq.int(30) +epsilons <- seq(0.1, 0.4, by = 0.1) +models <- c("iid", "pi", "rho", "pirho") + +save_folder <- here( + "code", "results", "simulations", "clustering", + "90collection" +) + +if (!dir.exists(save_folder)) { + dir.create(save_folder) +} + +save_filename <- paste0( + "90collection_data_clustering_", + format(Sys.time(), "%d-%m-%y-%H-%M-%S"), + ".Rds" +) + +temp_folder <- file.path(save_folder, paste0("tmp", format(Sys.time(), "%d-%m-%y-%H-%M-%S"))) + +if (!dir.exists(temp_folder)) { + dir.create(temp_folder) +} + +conditions <- tidyr::crossing(epsilons, repetitions, models) +results <- future.apply::future_lapply( + seq_len(nrow(conditions)), function(s) { + eps <- conditions[s, ]$epsilons + current_pi <- pi + current_rho <- rho + current_model <- conditions[s, ]$models + + message() + + alpha_assortative <- matrix(0.3, nrow = 3, ncol = 3) + + matrix( + c( + eps, -0.5 * eps, -0.5 * eps, + -0.5 * eps, eps, -0.5 * eps, + -0.5 * eps, -0.5 * eps, eps + ), + nrow = 3, byrow = TRUE + ) + + alpha_core_periphery <- matrix(0.3, nrow = 3, ncol = 3) + + matrix( + c( + 1.5 * eps, eps, 0.5 * eps, + eps, 0.5 * eps, 0, + 0.5 * eps, 0, -0.5 * eps + ), + nrow = 3, byrow = TRUE + ) + + alpha_disassortative <- matrix(0.3, nrow = 3, ncol = 3) + + matrix( + c( + -0.5 * eps, eps, eps, + eps, -0.5 * eps, eps, + eps, eps, -0.5 * eps + ), + nrow = 3, byrow = TRUE + ) + + assortative_collection <- generate_bipartite_collection( + nr, nc, + current_pi, current_rho, + alpha_assortative, M / 3, + model = current_model, + return_memberships = TRUE + ) + + assortative_incidence <- lapply( + seq_along(assortative_collection), + function(m) { + return(assortative_collection[[m]]$incidence_matrix) + } + ) + + assortative_row_clustering <- lapply( + seq_along(assortative_collection), + function(m) { + return(assortative_collection[[m]]$row_clustering) + } + ) + + assortative_col_clustering <- lapply( + seq_along(assortative_collection), + function(m) { + return(assortative_collection[[m]]$row_clustering) + } + ) + + core_periphery_collection <- generate_bipartite_collection( + nr, nc, + current_pi, current_rho, + alpha_core_periphery, M / 3, + model = current_model, + return_memberships = TRUE + ) + + core_periphery_incidence <- lapply( + seq_along(core_periphery_collection), + function(m) { + return(core_periphery_collection[[m]]$incidence_matrix) + } + ) + + core_periphery_row_clustering <- lapply( + seq_along(core_periphery_collection), + function(m) { + return(core_periphery_collection[[m]]$row_clustering) + } + ) + + core_periphery_col_clustering <- lapply( + seq_along(core_periphery_collection), + function(m) { + return(core_periphery_collection[[m]]$row_clustering) + } + ) + + disassortative_collection <- generate_bipartite_collection( + nr, nc, + current_pi, current_rho, + alpha_disassortative, M / 3, + model = current_model, + return_memberships = TRUE + ) + + disassortative_incidence <- lapply( + seq_along(disassortative_collection), + function(m) { + return(disassortative_collection[[m]]$incidence_matrix) + } + ) + + disassortative_row_clustering <- lapply( + seq_along(disassortative_collection), + function(m) { + return(disassortative_collection[[m]]$row_clustering) + } + ) + + disassortative_col_clustering <- lapply( + seq_along(disassortative_collection), + function(m) { + return(disassortative_collection[[m]]$row_clustering) + } + ) + + real_row_clustering <- append( + append( + assortative_row_clustering, + core_periphery_row_clustering + ), + disassortative_row_clustering + ) + + real_col_clustering <- append( + append( + assortative_col_clustering, + core_periphery_col_clustering + ), + disassortative_col_clustering + ) + + incidence_matrices <- append( + append( + assortative_incidence, + core_periphery_incidence + ), + disassortative_incidence + ) + + netids <- rep(c("as", "cp", "dis"), each = M / 3) + + list_collection <- clusterize_bipartite_networks( + netlist = incidence_matrices, + net_id = netids, + nb_run = 3L, + colsbm_model = current_model, + global_opts = list( + nb_cores = parallelly::availableCores(omit = 1L), verbosity = 2, + plot_details = 0 # , + ) + ) + + best_partitions <- unlist(extract_best_partition(list_collection)) + if (!is(best_partitions, "list")) { + best_partitions <- list(best_partitions) + } + clustering <- unlist(lapply(seq_along(best_partitions), function(col_idx) { + setNames( + rep(col_idx, best_partitions[[col_idx]]$M), + best_partitions[[col_idx]]$net_id + ) + })) + # ARI computation + clustering <- clustering[order(names(clustering))] + ari <- aricode::ARI(rep(c(1, 2, 3), each = M / 3), clustering) + out <- data.frame(epsilon = eps, model = current_model, ARI = ari) + + saveRDS(out, file = file.path( + temp_folder, + paste0("condition_", s, "_on_", nrow(conditions), ".Rds") + )) + + return(out) + }, + future.seed = NULL +) + +data_frame_result <- do.call("rbind", results) + +saveRDS(data_frame_result, file = file.path( + save_folder, + save_filename +))