simulations : network clustering mis à jour

This commit is contained in:
Louis Lacoste 2024-07-16 20:12:22 +02:00
parent 5e086c92c3
commit dcacfb1676

View file

@ -1,233 +1,227 @@
necessary_packages <- c("remotes", "colSBM")
if (!(necessary_packages %in% installed.packages())) {
if (!all(necessary_packages %in% installed.packages())) {
install.packages(necessary_packages[-length(necessary_packages)])
remotes::install_github(repo = "Chabert-Liddell/colSBM@merge-bipartite-2")
}
suppressPackageStartupMessages(library("colSBM"))
future::plan(future::multicore())
set.seed(1234)
# Generation of conditions
if (!exists("model_to_test")) {
model_to_test <- "iid"
}
if (!exists("repetitions")) {
repetitions <- seq.int(30)
}
nr <- 75
nc <- 75
pi <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
rho <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
repetitions <- seq.int(30)
epsilons <- seq(0.1, 0.4, by = 0.1)
models <- c("iid", "pi", "rho", "pirho")
if (!exists("arg")) {
arg <- commandArgs(trailingOnly = TRUE)
}
conditions <- tidyr::crossing(epsilons, repetitions, models)
results <- future.apply::future_lapply(
seq_len(nrow(conditions)), function(s) {
eps <- conditions[s, ]$epsilons
current_pi <- pi
current_rho <- rho
current_model <- conditions[s, ]$models
if (identical(arg, character(0L))) {
model_to_test <- "iid"
} else {
model_to_test <- arg
}
message()
conditions <- tidyr::crossing(epsilons, pi, rho, repetitions)
alpha_assortative <- matrix(0.3, nrow = 3, ncol = 3) +
matrix(
c(
eps, -0.5 * eps, -0.5 * eps,
-0.5 * eps, eps, -0.5 * eps,
-0.5 * eps, -0.5 * eps, eps
),
nrow = 3, byrow = TRUE
)
results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
eps <- conditions[s, ]$epsilons
current_pi <- conditions[s, ]$pi
current_rho <- conditions[s, ]$rho
alpha_core_periphery <- matrix(0.3, nrow = 3, ncol = 3) +
matrix(
c(
1.5 * eps, eps, 0.5 * eps,
eps, 0.5 * eps, 0,
0.5 * eps, 0, -0.5 * eps
),
nrow = 3, byrow = TRUE
)
alpha_assortative <- matrix(0.3, nrow = 3, ncol = 3) +
matrix(
c(
eps, -0.5 * eps, -0.5 * eps,
-0.5 * eps, eps, -0.5 * eps,
-0.5 * eps, -0.5 * eps, eps
alpha_disassortative <- matrix(0.3, nrow = 3, ncol = 3) +
matrix(
c(
-0.5 * eps, eps, eps,
eps, -0.5 * eps, eps,
eps, eps, -0.5 * eps
),
nrow = 3, byrow = TRUE
)
assortative_collection <- generate_bipartite_collection(
nr, nc,
current_pi, current_rho,
alpha_assortative, 3,
model = current_model,
return_memberships = TRUE
)
assortative_incidence <- lapply(
seq_along(assortative_collection),
function(m) {
return(assortative_collection[[m]]$incidence_matrix)
}
)
assortative_row_clustering <- lapply(
seq_along(assortative_collection),
function(m) {
return(assortative_collection[[m]]$row_clustering)
}
)
assortative_col_clustering <- lapply(
seq_along(assortative_collection),
function(m) {
return(assortative_collection[[m]]$row_clustering)
}
)
core_periphery_collection <- generate_bipartite_collection(
nr, nc,
current_pi, current_rho,
alpha_core_periphery, 3,
model = current_model,
return_memberships = TRUE
)
core_periphery_incidence <- lapply(
seq_along(core_periphery_collection),
function(m) {
return(core_periphery_collection[[m]]$incidence_matrix)
}
)
core_periphery_row_clustering <- lapply(
seq_along(core_periphery_collection),
function(m) {
return(core_periphery_collection[[m]]$row_clustering)
}
)
core_periphery_col_clustering <- lapply(
seq_along(core_periphery_collection),
function(m) {
return(core_periphery_collection[[m]]$row_clustering)
}
)
disassortative_collection <- generate_bipartite_collection(
nr, nc,
current_pi, current_rho,
alpha_disassortative, 3,
model = current_model,
return_memberships = TRUE
)
disassortative_incidence <- lapply(
seq_along(disassortative_collection),
function(m) {
return(disassortative_collection[[m]]$incidence_matrix)
}
)
disassortative_row_clustering <- lapply(
seq_along(disassortative_collection),
function(m) {
return(disassortative_collection[[m]]$row_clustering)
}
)
disassortative_col_clustering <- lapply(
seq_along(disassortative_collection),
function(m) {
return(disassortative_collection[[m]]$row_clustering)
}
)
real_row_clustering <- append(
append(
assortative_row_clustering,
core_periphery_row_clustering
),
nrow = 3, byrow = TRUE
disassortative_row_clustering
)
alpha_core_periphery <- matrix(0.3, nrow = 3, ncol = 3) +
matrix(
c(
1.5 * eps, eps, 0.5 * eps,
eps, 0.5 * eps, 0,
0.5 * eps, 0, -0.5 * eps
real_col_clustering <- append(
append(
assortative_col_clustering,
core_periphery_col_clustering
),
nrow = 3, byrow = TRUE
disassortative_col_clustering
)
alpha_disassortative <- matrix(0.3, nrow = 3, ncol = 3) +
matrix(
c(
-0.5 * eps, eps, eps,
eps, -0.5 * eps, eps,
eps, eps, -0.5 * eps
incidence_matrices <- append(
append(
assortative_incidence,
core_periphery_incidence
),
nrow = 3, byrow = TRUE
disassortative_incidence
)
assortative_collection <- generate_bipartite_collectionge(
nr, nc,
current_pi, current_rho,
alpha_assortative, 3,
model = model_to_test,
return_memberships = TRUE
)
netids <- rep(c("as", "cp", "dis"), each = 3)
assortative_incidence <- lapply(
seq_along(assortative_collection),
function(m) {
return(assortative_collection[[m]]$incidence_matrix)
}
)
assortative_row_clustering <- lapply(
seq_along(assortative_collection),
function(m) {
return(assortative_collection[[m]]$row_clustering)
}
)
assortative_col_clustering <- lapply(
seq_along(assortative_collection),
function(m) {
return(assortative_collection[[m]]$row_clustering)
}
)
core_periphery_collection <- generate_bipartite_collection(
nr, nc,
current_pi, current_rho,
alpha_core_periphery, 3,
model = model_to_test,
return_memberships = TRUE
)
core_periphery_incidence <- lapply(
seq_along(core_periphery_collection),
function(m) {
return(core_periphery_collection[[m]]$incidence_matrix)
}
)
core_periphery_row_clustering <- lapply(
seq_along(core_periphery_collection),
function(m) {
return(core_periphery_collection[[m]]$row_clustering)
}
)
core_periphery_col_clustering <- lapply(
seq_along(core_periphery_collection),
function(m) {
return(core_periphery_collection[[m]]$row_clustering)
}
)
disassortative_collection <- generate_bipartite_collection(
nr, nc,
current_pi, current_rho,
alpha_disassortative, 3,
model = model_to_test,
return_memberships = TRUE
)
disassortative_incidence <- lapply(
seq_along(disassortative_collection),
function(m) {
return(disassortative_collection[[m]]$incidence_matrix)
}
)
disassortative_row_clustering <- lapply(
seq_along(disassortative_collection),
function(m) {
return(disassortative_collection[[m]]$row_clustering)
}
)
disassortative_col_clustering <- lapply(
seq_along(disassortative_collection),
function(m) {
return(disassortative_collection[[m]]$row_clustering)
}
)
real_row_clustering <- append(
append(
assortative_row_clustering,
core_periphery_row_clustering
),
disassortative_row_clustering
)
real_col_clustering <- append(
append(
assortative_col_clustering,
core_periphery_col_clustering
),
disassortative_col_clustering
)
incidence_matrices <- append(
append(
assortative_incidence,
core_periphery_incidence
),
disassortative_incidence
)
netids <- rep(c("as", "cp", "dis"), each = 3)
tictoc::tic()
list_collection <- clusterize_bipartite_networks(
netlist = incidence_matrices,
net_id = netids,
nb_run = 1,
colsbm_model = model_to_test,
global_opts = list(
nb_cores = parallel::detectCores() - 1, verbosity = 2,
plot_details = 0 # ,
# parallelization_vector = c(FALSE, FALSE, FALSE)
),
silent_parallelization = TRUE
)
best_partitions <- unlist( extract_best_partition(list_collection))
if (!is(best_partitions, "list")) {
best_partitions <- list(best_partitions)
}
clustering <- unlist(lapply(seq_along(best_partitions), function(col_idx) {
setNames(
rep(col_idx, best_partitions[[col_idx]]$M),
best_partitions[[col_idx]]$net_id
list_collection <- clusterize_bipartite_networks(
netlist = incidence_matrices,
net_id = netids,
nb_run = 3L,
colsbm_model = current_model,
global_opts = list(
nb_cores = parallelly::availableCores(omit = 1L), verbosity = 2,
plot_details = 0 # ,
# parallelization_vector = c(FALSE, FALSE, FALSE)
)
)
}))
# ARI computation
clustering <- clustering[order(names(clustering))]
ari <- aricode::ARI(rep(c(1, 2, 3), each = 3), clustering)
tictoc::toc()
return(
data.frame(epsilon = eps, model = model_to_test, ARI = ari)
)
},
mc.cores = parallel::detectCores() - 1,
mc.progress = TRUE,
mc.retry = -1
best_partitions <- unlist(extract_best_partition(list_collection))
if (!is(best_partitions, "list")) {
best_partitions <- list(best_partitions)
}
clustering <- unlist(lapply(seq_along(best_partitions), function(col_idx) {
setNames(
rep(col_idx, best_partitions[[col_idx]]$M),
best_partitions[[col_idx]]$net_id
)
}))
# ARI computation
clustering <- clustering[order(names(clustering))]
ari <- aricode::ARI(rep(c(1, 2, 3), each = 3), clustering)
return(
data.frame(epsilon = eps, model = current_model, ARI = ari)
)
},
future.seed = NULL
)
data_frame_result <- do.call("rbind", results)
saveRDS(data_frame_result, file = paste0(
"simulation/data/",
"simulated_collection_data_clustering_",
model_to_test, "_",
format(Sys.time(), "%d-%m-%y-%X"),
save_folder <- here(
"code", "results", "simulations", "clustering",
"9collection"
)
if (!dir.exists(save_folder)) {
dir.create(save_folder)
}
save_filename <- paste0(
"9collection_data_clustering_",
format(Sys.time(), "%d-%m-%y-%H-%M-%S"),
".Rds"
)
saveRDS(data_frame_result, file = file.path(
save_folder,
save_filename
))