Synthetic clustering ready to launch
🚧 Profiling ready 🧯Removing .out files from .gitignore missed regex
This commit is contained in:
parent
e9607d18af
commit
ec197a535d
5 changed files with 9132 additions and 0 deletions
Binary file not shown.
6026
code/results/investigating/profiling/no-parallel-asis.out
Normal file
6026
code/results/investigating/profiling/no-parallel-asis.out
Normal file
File diff suppressed because it is too large
Load diff
2939
code/results/investigating/profiling/parallel-asis.out
Normal file
2939
code/results/investigating/profiling/parallel-asis.out
Normal file
File diff suppressed because it is too large
Load diff
104
code/simulations/clustering/synthetic_clustering.R
Normal file
104
code/simulations/clustering/synthetic_clustering.R
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
library(colSBM)
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(here)
|
||||
library(future)
|
||||
library(future.apply)
|
||||
library(profvis)
|
||||
|
||||
plan(multicore)
|
||||
|
||||
# Constants
|
||||
max_vem_steps <- 500L
|
||||
|
||||
application_name <- "synthetic"
|
||||
|
||||
data_folder <- file.path(
|
||||
here(), "code", "data", "simulations", "clustering",
|
||||
application_name
|
||||
)
|
||||
|
||||
# Arguments
|
||||
arg <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
model <- "iid"
|
||||
seed <- 1234L
|
||||
eps <- 0.4
|
||||
|
||||
if (length(arg) == 0L) {
|
||||
message("No arguments provided, using default.")
|
||||
} else {
|
||||
if ("--model" %in% arg) {
|
||||
model <- arg[(which(arg == "--model") + 1L)]
|
||||
} else {
|
||||
message("No model provided, defaulting to iid.")
|
||||
}
|
||||
if ("--seed" %in% arg) {
|
||||
seed <- try(as.integer(arg[(which(arg == "--seed") + 1L)]))
|
||||
} else {
|
||||
message("No seed provided, defaulting to 1234.")
|
||||
}
|
||||
if ("--eps" %in% arg) {
|
||||
eps <- try(as.integer(arg[(which(arg == "--eps") + 1L)]))
|
||||
} else {
|
||||
message("No eps provided, defaulting to 0.4.")
|
||||
}
|
||||
}
|
||||
|
||||
# Arguments checks
|
||||
allowed_model <- c("iid", "pi", "rho", "pirho")
|
||||
stopifnot(
|
||||
"Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model),
|
||||
"Seed isn't castable to integer" = (is.integer(seed))
|
||||
)
|
||||
|
||||
message(
|
||||
sprintf("Début du clustering des données %s avec le modèle ", application_name), model,
|
||||
" et la seed ", seed
|
||||
)
|
||||
|
||||
set.seed(seed, "L'Ecuyer-CMRG")
|
||||
save_folder <- file.path(
|
||||
here(), "code", "results",
|
||||
"simulations", "clustering",
|
||||
application_name
|
||||
)
|
||||
|
||||
if (!dir.exists(save_folder)) {
|
||||
dir.create(save_folder, recursive = TRUE)
|
||||
}
|
||||
|
||||
source(here("code", "simulations", "clustering", "synthetic_generate_iid.R"))
|
||||
|
||||
netlist <- generate_synth_col(eps)
|
||||
|
||||
start_time <- format(Sys.time(), "%d-%m-%y_%H-%M-%S")
|
||||
|
||||
profvis(
|
||||
{
|
||||
list_collection <- colSBM::clusterize_bipartite_networks(
|
||||
netlist = netlist,
|
||||
net_id = names(netlist),
|
||||
colsbm_model = "iid",
|
||||
global_opts = list(
|
||||
verbosity = 3L,
|
||||
plot_details = 0L,
|
||||
backend = "future",
|
||||
nb_cores = parallelly::availableCores(omit = 1L)
|
||||
),
|
||||
fit_opts = list(
|
||||
max_vem_steps = max_vem_steps
|
||||
)
|
||||
)
|
||||
},
|
||||
prof_output = file.path(save_folder, paste0("profiling_", start_time, ".out"))
|
||||
)
|
||||
save_file <- file.path(
|
||||
save_folder, paste0(
|
||||
sprintf("%s_collection_", application_name),
|
||||
model, "_", start_time, "_maxsteps_500.Rds"
|
||||
)
|
||||
)
|
||||
|
||||
message("Clustering saved.")
|
||||
saveRDS(list_collection, file = save_file)
|
||||
63
code/simulations/clustering/synthetic_generate_iid.R
Normal file
63
code/simulations/clustering/synthetic_generate_iid.R
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
# Should not be used standalone !
|
||||
|
||||
base_alpha <- matrix(rep(0.3, 9L), nrow = 3L)
|
||||
pi <- c(0.3, 0.2, 0.5)
|
||||
rho <- c(0.55, 0.15, 0.3)
|
||||
M <- 40L
|
||||
nr <- c(rep(30L, M / 2L), rep(95L, M / 2L))
|
||||
nc <- c(rep(40L, M / 2L), rep(70L, M / 2L))
|
||||
|
||||
message("Génération de la collection synthétique avec la seed ", seed)
|
||||
|
||||
generate_synth_col <- function(eps) {
|
||||
as_alpha <- base_alpha + matrix(
|
||||
c(
|
||||
eps, -eps / 2L, -eps / 2L,
|
||||
-eps / 2L, eps, -eps / 2L,
|
||||
-eps / 2L, -eps / 2L, eps
|
||||
),
|
||||
nrow = 3L
|
||||
)
|
||||
|
||||
cp_alpha <- base_alpha + matrix(
|
||||
c(
|
||||
3L * eps / 2L, eps, eps / 2L,
|
||||
eps, eps / 2L, 0L,
|
||||
eps / 2L, 0L, -eps / 2L
|
||||
),
|
||||
nrow = 3L
|
||||
)
|
||||
|
||||
dis_alpha <- base_alpha + matrix(
|
||||
c(
|
||||
-eps / 2L, eps, eps,
|
||||
eps, -eps / 2L, eps,
|
||||
eps, eps, -eps / 2L
|
||||
),
|
||||
nrow = 3L
|
||||
)
|
||||
|
||||
collection <- c(
|
||||
generate_bipartite_collection(
|
||||
nr = nr, nc = nc,
|
||||
pi = pi, rho = rho,
|
||||
alpha = as_alpha, M = M
|
||||
),
|
||||
generate_bipartite_collection(
|
||||
nr = nr, nc = nc,
|
||||
pi = pi, rho = rho,
|
||||
alpha = cp_alpha, M = M
|
||||
),
|
||||
generate_bipartite_collection(
|
||||
nr = nr, nc = nc,
|
||||
pi = pi, rho = rho,
|
||||
alpha = dis_alpha, M = M
|
||||
)
|
||||
)
|
||||
names(collection) <- c(
|
||||
rep("assortative_small", M / 2), rep("assortative", M / 2),
|
||||
rep("core_periphery_small", M / 2), rep("core_periphery", M / 2),
|
||||
rep("disassortative_small", M / 2), rep("disassortative", M / 2)
|
||||
)
|
||||
collection
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue