Synthetic clustering ready to launch

🚧 Profiling ready
🧯Removing .out files from .gitignore missed regex
This commit is contained in:
Louis Lacoste 2024-06-17 11:56:34 +02:00
parent e9607d18af
commit ec197a535d
5 changed files with 9132 additions and 0 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,104 @@
library(colSBM)
library(dplyr)
library(tidyr)
library(here)
library(future)
library(future.apply)
library(profvis)
plan(multicore)
# Constants
max_vem_steps <- 500L
application_name <- "synthetic"
data_folder <- file.path(
here(), "code", "data", "simulations", "clustering",
application_name
)
# Arguments
arg <- commandArgs(trailingOnly = TRUE)
model <- "iid"
seed <- 1234L
eps <- 0.4
if (length(arg) == 0L) {
message("No arguments provided, using default.")
} else {
if ("--model" %in% arg) {
model <- arg[(which(arg == "--model") + 1L)]
} else {
message("No model provided, defaulting to iid.")
}
if ("--seed" %in% arg) {
seed <- try(as.integer(arg[(which(arg == "--seed") + 1L)]))
} else {
message("No seed provided, defaulting to 1234.")
}
if ("--eps" %in% arg) {
eps <- try(as.integer(arg[(which(arg == "--eps") + 1L)]))
} else {
message("No eps provided, defaulting to 0.4.")
}
}
# Arguments checks
allowed_model <- c("iid", "pi", "rho", "pirho")
stopifnot(
"Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model),
"Seed isn't castable to integer" = (is.integer(seed))
)
message(
sprintf("Début du clustering des données %s avec le modèle ", application_name), model,
" et la seed ", seed
)
set.seed(seed, "L'Ecuyer-CMRG")
save_folder <- file.path(
here(), "code", "results",
"simulations", "clustering",
application_name
)
if (!dir.exists(save_folder)) {
dir.create(save_folder, recursive = TRUE)
}
source(here("code", "simulations", "clustering", "synthetic_generate_iid.R"))
netlist <- generate_synth_col(eps)
start_time <- format(Sys.time(), "%d-%m-%y_%H-%M-%S")
profvis(
{
list_collection <- colSBM::clusterize_bipartite_networks(
netlist = netlist,
net_id = names(netlist),
colsbm_model = "iid",
global_opts = list(
verbosity = 3L,
plot_details = 0L,
backend = "future",
nb_cores = parallelly::availableCores(omit = 1L)
),
fit_opts = list(
max_vem_steps = max_vem_steps
)
)
},
prof_output = file.path(save_folder, paste0("profiling_", start_time, ".out"))
)
save_file <- file.path(
save_folder, paste0(
sprintf("%s_collection_", application_name),
model, "_", start_time, "_maxsteps_500.Rds"
)
)
message("Clustering saved.")
saveRDS(list_collection, file = save_file)

View file

@ -0,0 +1,63 @@
# Should not be used standalone !
base_alpha <- matrix(rep(0.3, 9L), nrow = 3L)
pi <- c(0.3, 0.2, 0.5)
rho <- c(0.55, 0.15, 0.3)
M <- 40L
nr <- c(rep(30L, M / 2L), rep(95L, M / 2L))
nc <- c(rep(40L, M / 2L), rep(70L, M / 2L))
message("Génération de la collection synthétique avec la seed ", seed)
generate_synth_col <- function(eps) {
as_alpha <- base_alpha + matrix(
c(
eps, -eps / 2L, -eps / 2L,
-eps / 2L, eps, -eps / 2L,
-eps / 2L, -eps / 2L, eps
),
nrow = 3L
)
cp_alpha <- base_alpha + matrix(
c(
3L * eps / 2L, eps, eps / 2L,
eps, eps / 2L, 0L,
eps / 2L, 0L, -eps / 2L
),
nrow = 3L
)
dis_alpha <- base_alpha + matrix(
c(
-eps / 2L, eps, eps,
eps, -eps / 2L, eps,
eps, eps, -eps / 2L
),
nrow = 3L
)
collection <- c(
generate_bipartite_collection(
nr = nr, nc = nc,
pi = pi, rho = rho,
alpha = as_alpha, M = M
),
generate_bipartite_collection(
nr = nr, nc = nc,
pi = pi, rho = rho,
alpha = cp_alpha, M = M
),
generate_bipartite_collection(
nr = nr, nc = nc,
pi = pi, rho = rho,
alpha = dis_alpha, M = M
)
)
names(collection) <- c(
rep("assortative_small", M / 2), rep("assortative", M / 2),
rep("core_periphery_small", M / 2), rep("core_periphery", M / 2),
rep("disassortative_small", M / 2), rep("disassortative", M / 2)
)
collection
}