Adding all
This commit is contained in:
parent
38cefffceb
commit
c085f96a78
10 changed files with 507 additions and 43 deletions
87
code/analysis/analyze_NA_robustness.R
Normal file
87
code/analysis/analyze_NA_robustness.R
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
library(ggplot2)
|
||||
library(tidyverse)
|
||||
library(latex2exp)
|
||||
library(patchwork)
|
||||
|
||||
# Loading data
|
||||
data_folder <- file.path("code", "results", "simulations", "NA_robustness")
|
||||
|
||||
data <- readRDS(file.path(
|
||||
data_folder,
|
||||
"NA_robustness18-04-2024_17-05-44_1-100.Rds"
|
||||
))
|
||||
|
||||
max_repetition <- max(data$repetition)
|
||||
|
||||
# Averaging over repetitions
|
||||
averaged_data <- data %>%
|
||||
group_by(prop_NAs, model) %>%
|
||||
select(-repetition) %>%
|
||||
summarise_all(list(mean = mean, sd = sd))
|
||||
|
||||
# Preparing auc_data
|
||||
auc_data <- averaged_data %>%
|
||||
select(c(prop_NAs, model) | contains("auc_")) %>%
|
||||
rename_with(~ gsub("auc_", "", .x, fixed = TRUE))
|
||||
auc_data_long <-
|
||||
bind_cols(
|
||||
auc_data %>% select(c("prop_NAs", "model") | contains("_mean")) %>%
|
||||
pivot_longer(!c(prop_NAs, model),
|
||||
names_to = c("method"),
|
||||
values_to = "auc_mean"
|
||||
),
|
||||
auc_data %>% select(c("prop_NAs", "model") | contains("_sd")) %>%
|
||||
pivot_longer(!c(prop_NAs, model),
|
||||
names_to = NULL,
|
||||
values_to = "auc_sd"
|
||||
) %>% ungroup() %>% select(!c("prop_NAs", "model"))
|
||||
) %>% mutate(method = gsub(
|
||||
pattern = "_mean",
|
||||
replacement = "", fixed = TRUE
|
||||
))
|
||||
# Preparing ARI data
|
||||
ari_data <- averaged_data %>% select(c(prop_NAs, model) | contains("ari"))
|
||||
|
||||
ari_data_long <- bind_cols(
|
||||
ari_data %>% select(c("prop_NAs", "model") | contains("_mean")) %>%
|
||||
pivot_longer(!c(prop_NAs, model),
|
||||
names_to = c("method"),
|
||||
values_to = "ari_mean"
|
||||
),
|
||||
ari_data %>% select(c("prop_NAs", "model") | contains("_sd")) %>%
|
||||
pivot_longer(!c(prop_NAs, model),
|
||||
names_to = NULL,
|
||||
values_to = "ari_sd"
|
||||
) %>% ungroup() %>% select(!c("prop_NAs", "model"))
|
||||
) %>%
|
||||
separate(method, into = c("dim", "method", "useless"), sep = "_") %>%
|
||||
select(-useless)
|
||||
|
||||
auc_plot <- ggplot(auc_data_long) +
|
||||
aes(x = prop_NAs, y = auc_mean) +
|
||||
geom_line(aes(color = method)) +
|
||||
geom_point(aes(color = method)) +
|
||||
geom_ribbon(aes(ymin = auc_mean - auc_sd, ymax = auc_mean + auc_sd, fill = method), alpha = 0.2) +
|
||||
ylim(c(0, 1)) +
|
||||
scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
|
||||
ylab(TeX("\\bar{AUC}")) +
|
||||
xlab("NA proportion") +
|
||||
ggtitle(TeX(paste(
|
||||
"$\\bar{AUC}\\pm s_{AUC}$", ", function of NA proportion. N=", max_repetition
|
||||
))) +
|
||||
facet_grid(cols = vars(model))
|
||||
|
||||
ari_plot <- ggplot(ari_data_long) +
|
||||
aes(x = prop_NAs, y = ari_mean) +
|
||||
geom_line(aes(color = method)) +
|
||||
geom_point(aes(color = method)) +
|
||||
geom_ribbon(aes(ymin = ari_mean - ari_sd, ymax = ari_mean + ari_sd, fill = method), alpha = 0.2) +
|
||||
scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
|
||||
ylab(TeX("$\\bar{ARI^d}$")) +
|
||||
xlab("NA proportion") +
|
||||
ggtitle(TeX(paste(
|
||||
"$\\bar{ARI^d}\\pm s_{ARI^d}$", ", function of NA proportion. N=", max_repetition
|
||||
))) +
|
||||
facet_grid(rows = vars(model), cols = vars(dim))
|
||||
|
||||
auc_plot + ari_plot
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
18
code/scripts/migale_simulations_NA_robustness.sh
Executable file
18
code/scripts/migale_simulations_NA_robustness.sh
Executable file
|
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/env bash
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
|
||||
# Finding simulations directory
|
||||
SIMULATIONS_DIR=$(echo ${SCRIPT_DIR%/*}/simulations)
|
||||
|
||||
Rscript "$(echo ${SIMULATIONS_DIR}/simulations_NA_robustness.R)" "--struct nested --sampling uniform"
|
||||
Rscript "$(echo ${SIMULATIONS_DIR}/simulations_NA_robustness.R)" "--struct modular --sampling uniform"
|
||||
|
||||
|
||||
Rscript "$(echo ${SIMULATIONS_DIR}/simulations_NA_robustness.R)" "--struct nested --sampling row"
|
||||
Rscript "$(echo ${SIMULATIONS_DIR}/simulations_NA_robustness.R)" "--struct modular --sampling row"
|
||||
|
||||
Rscript "$(echo ${SIMULATIONS_DIR}/simulations_NA_robustness.R)" "--struct nested --sampling col"
|
||||
Rscript "$(echo ${SIMULATIONS_DIR}/simulations_NA_robustness.R)" "--struct modular --sampling col"
|
||||
|
||||
Rscript "$(echo ${SIMULATIONS_DIR}/simulations_NA_robustness.R)" "--struct nested --sampling rowcol"
|
||||
Rscript "$(echo ${SIMULATIONS_DIR}/simulations_NA_robustness.R)" "--struct modular --sampling rowcol"
|
||||
|
|
@ -1,35 +1,77 @@
|
|||
necessary_packages <- c(
|
||||
"remotes", "tictoc", "pROC", "sbm", "parallel", "colSBM"
|
||||
"remotes", "tictoc", "pROC", "parallel", "colSBM"
|
||||
)
|
||||
|
||||
if (!all((necessary_packages %in% installed.packages()))) {
|
||||
install.packages(necessary_packages[-length(necessary_packages)])
|
||||
remotes::install_github(repo = "Chabert-Liddell/colSBM@merge-bipartite-2")
|
||||
install.packages(necessary_packages[-length(necessary_packages)])
|
||||
remotes::install_github(repo = "Chabert-Liddell/colSBM@merge-bipartite-2")
|
||||
}
|
||||
|
||||
|
||||
library("sbm")
|
||||
library("pROC")
|
||||
library("colSBM")
|
||||
|
||||
|
||||
# Arguments
|
||||
arg <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
sampling <- "uniform"
|
||||
struct <- "modular"
|
||||
|
||||
if (length(arg) == 0L) {
|
||||
message("No arguments provided, using default.")
|
||||
} else {
|
||||
if ("--struct" %in% arg) {
|
||||
struct <- arg[(which(arg) == "--struct") + 1L]
|
||||
} else {
|
||||
message("No structure provided, defaulting to modular.")
|
||||
}
|
||||
|
||||
if ("--sampling" %in% arg) {
|
||||
sampling <- arg[(which(arg) == "--sampling") + 1L]
|
||||
} else {
|
||||
message("No sampling provided, defaulting to uniform.")
|
||||
}
|
||||
}
|
||||
|
||||
# Arguments checks
|
||||
allowed_sampling <- c("uniform", "row", "col", "rowcol")
|
||||
allowed_struct <- c("modular", "nested")
|
||||
|
||||
stopifnot(
|
||||
"Unknown sampling, should be : uniform, row, rowcol" = (sampling %in%
|
||||
allowed_sampling),
|
||||
"Unknown structure, should be : modular or nested" = (struct %in% allowed_struct)
|
||||
)
|
||||
|
||||
set.seed(1234)
|
||||
|
||||
eps <- 0.05
|
||||
|
||||
M <- 3
|
||||
M <- 4
|
||||
|
||||
# Defining parameters
|
||||
nr <- 100
|
||||
nc <- 150
|
||||
pir <- c(0.5, 0.3, 0.2)
|
||||
pic <- c(0.5, 0.3, 0.2)
|
||||
alpha <- matrix(c(
|
||||
0.7, 0.4, 0.3,
|
||||
0.4, 0.2, eps,
|
||||
0.3, eps, eps
|
||||
), byrow = TRUE, nrow = length(pir), ncol = length(pic))
|
||||
|
||||
max_repetition <- 10
|
||||
alpha <- switch(struct,
|
||||
"modular" = {
|
||||
alpha <- matrix(c(
|
||||
0.7, eps, eps,
|
||||
eps, 0.4, eps,
|
||||
eps, eps, 0.6
|
||||
), byrow = TRUE, nrow = length(pir), ncol = length(pic))
|
||||
},
|
||||
"nested" = {
|
||||
alpha <- matrix(c(
|
||||
0.7, 0.4, 0.3,
|
||||
0.4, 0.2, eps,
|
||||
0.3, eps, eps
|
||||
), byrow = TRUE, nrow = length(pir), ncol = length(pic))
|
||||
}
|
||||
)
|
||||
|
||||
max_repetition <- 5
|
||||
|
||||
# Collections
|
||||
collections <- list(
|
||||
|
|
@ -75,47 +117,89 @@ if (!dir.exists(main_dir)) {
|
|||
}
|
||||
|
||||
start_time <- format(Sys.time(), "%d-%m-%Y_%H-%M-%S")
|
||||
temp_dir <- file.path(main_dir, paste0("tmp", start_time))
|
||||
temp_dir <- file.path(main_dir, paste0(
|
||||
"tmp", start_time, "_",
|
||||
sampling, "_", struct
|
||||
))
|
||||
|
||||
if (!dir.exists(temp_dir)) {
|
||||
dir.create(temp_dir, recursive = TRUE)
|
||||
}
|
||||
|
||||
file_save <- file.path(main_dir, paste0(
|
||||
"NA_robustness",
|
||||
"NA_robustness_", sampling, "_", struct, "_",
|
||||
start_time, "_1-", nrow(conditions), ".Rds"
|
||||
))
|
||||
|
||||
message(
|
||||
"Starting NA robustness simulation with ", sampling,
|
||||
" sampling and ", struct, " structure."
|
||||
)
|
||||
result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
|
||||
# Looping over conditions
|
||||
prop_NAs <- conditions[current, ]$prop_NAs
|
||||
model <- as.character(conditions[current, ]$model)
|
||||
prop_NAs <- conditions[current, ][["prop_NAs"]]
|
||||
model <- as.character(conditions[current, ][["model"]])
|
||||
bipartite_collection <- collections[[model]]
|
||||
|
||||
# This is a list of the M incidence matrices
|
||||
bipartite_collection_incidence <- lapply(seq.int(M), function(m) {
|
||||
bipartite_collection[[m]]$incidence_matrix
|
||||
bipartite_collection[[m]][["incidence_matrix"]]
|
||||
})
|
||||
|
||||
# Sampling values to replace by NAs
|
||||
NAs_index <- sample(
|
||||
seq_len(length(bipartite_collection_incidence[[1]])),
|
||||
floor(prop_NAs * length(bipartite_collection_incidence[[1]]))
|
||||
Z <- lapply(seq.int(M), function(m) {
|
||||
list(
|
||||
bipartite_collection[[m]][["row_blockmemberships"]],
|
||||
bipartite_collection[[m]][["col_blockmemberships"]]
|
||||
)
|
||||
})
|
||||
|
||||
NAs_selected_index <- switch(sampling,
|
||||
"uniform" = {
|
||||
seq_len(length(bipartite_collection_incidence[[1]]))
|
||||
},
|
||||
"row" = {
|
||||
row_cluster_selected <- sample.int(n = length(pir), size = 1)
|
||||
row_nodes_selected <- which(Z[[1]][[1]] == row_cluster_selected)
|
||||
col_nodes_selected <- seq(1, nc)
|
||||
|
||||
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
|
||||
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
(NAs_selected_index_exp[["col"]] - 1) *
|
||||
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
|
||||
},
|
||||
"col" = {
|
||||
row_nodes_selected <- seq(1, nr)
|
||||
col_cluster_selected <- sample.int(n = length(pic), size = 1)
|
||||
col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected)
|
||||
|
||||
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
|
||||
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
(NAs_selected_index_exp[["col"]] - 1) *
|
||||
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
|
||||
},
|
||||
"rowcol" = {
|
||||
row_cluster_selected <- sample.int(n = length(pir), size = 1)
|
||||
row_nodes_selected <- which(Z[[1]][[1]] == row_cluster_selected)
|
||||
col_cluster_selected <- sample.int(n = length(pic), size = 1)
|
||||
col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected)
|
||||
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
|
||||
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
(NAs_selected_index_exp[["col"]] - 1) *
|
||||
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
|
||||
}
|
||||
)
|
||||
|
||||
NAs_index <- sample(NAs_selected_index, size = floor(prop_NAs * length(NAs_selected_index)))
|
||||
|
||||
real_val_NAs <- bipartite_collection_incidence[[1]][NAs_index]
|
||||
bipartite_collection_incidence[[1]][NAs_index] <- NA
|
||||
NAs_coordinates <- which(is.na(bipartite_collection_incidence[[1]]),
|
||||
arr.ind = TRUE
|
||||
)
|
||||
|
||||
Z <- lapply(seq.int(M), function(m) {
|
||||
list(
|
||||
bipartite_collection[[m]]$row_blockmemberships,
|
||||
bipartite_collection[[m]]$col_blockmemberships
|
||||
)
|
||||
})
|
||||
|
||||
start_time <- Sys.time()
|
||||
mybisbmpop <- estimate_colBiSBM(
|
||||
netlist = bipartite_collection_incidence, colsbm_model = model,
|
||||
|
|
@ -135,8 +219,12 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
|
|||
)
|
||||
|
||||
# Predicted links
|
||||
X_hat_LBM <- baseline_LBM$best_fit$tau[[1]][[1]] %*% baseline_LBM$best_fit$alpha %*% t(baseline_LBM$best_fit$tau[[1]][[2]])
|
||||
X_hat <- mybisbmpop$best_fit$tau[[1]][[1]] %*% mybisbmpop$best_fit$alpha %*% t(mybisbmpop$best_fit$tau[[1]][[2]])
|
||||
X_hat_LBM <- baseline_LBM[["best_fit"]][["tau"]][[1]][[1]] %*%
|
||||
baseline_LBM[["best_fit"]][["alpha"]] %*%
|
||||
t(baseline_LBM[["best_fit"]][["tau"]][[1]][[2]])
|
||||
X_hat <- mybisbmpop[["best_fit"]][["tau"]][[1]][[1]] %*%
|
||||
mybisbmpop[["best_fit"]][["alpha"]] %*%
|
||||
t(mybisbmpop[["best_fit"]][["tau"]][[1]][[2]])
|
||||
|
||||
# Compute ROC and AUC
|
||||
auc_LBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat_LBM[NAs_index]))
|
||||
|
|
@ -146,24 +234,24 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
|
|||
out_data_frame <- data.frame(
|
||||
prop_NAs = prop_NAs,
|
||||
model = model,
|
||||
repetition = conditions[current, ]$repetition,
|
||||
repetition = conditions[current, ][["repetition"]],
|
||||
auc_LBM = auc_LBM,
|
||||
auc_colBiSBM = auc_colBiSBM,
|
||||
LBM_ari_row = aricode::ARI(
|
||||
arirow_LBM = aricode::ARI(
|
||||
Z[[1]][[1]],
|
||||
baseline_LBM$best_fit$Z[[1]][[1]]
|
||||
baseline_LBM[["best_fit"]][["Z"]][[1]][[1]]
|
||||
),
|
||||
LBM_ari_col = aricode::ARI(
|
||||
aricol_LBM = aricode::ARI(
|
||||
Z[[1]][[2]],
|
||||
baseline_LBM$best_fit$Z[[1]][[2]]
|
||||
baseline_LBM[["best_fit"]][["Z"]][[1]][[2]]
|
||||
),
|
||||
NA_net_ari_row = aricode::ARI(
|
||||
arirow_colBiSBM = aricode::ARI(
|
||||
Z[[1]][[1]],
|
||||
mybisbmpop$best_fit$Z[[1]][[1]]
|
||||
mybisbmpop[["best_fit"]][["Z"]][[1]][[1]]
|
||||
),
|
||||
NA_net_ari_col = aricode::ARI(
|
||||
aricol_colBiSBM = aricode::ARI(
|
||||
Z[[1]][[2]],
|
||||
mybisbmpop$best_fit$Z[[1]][[2]]
|
||||
mybisbmpop[["best_fit"]][["Z"]][[1]][[2]]
|
||||
),
|
||||
elapsed_secs = difftime(stop_time, start_time, units = "sec")
|
||||
)
|
||||
|
|
@ -190,3 +278,4 @@ saveRDS(
|
|||
result_dataframe,
|
||||
file = file_save
|
||||
)
|
||||
message("Finished simulations.")
|
||||
|
|
@ -90,6 +90,7 @@ file_save <- file.path(main_dir, paste0("bernoulli_inference_", start_time, "_",
|
|||
|
||||
tictoc::tic()
|
||||
|
||||
message("Starting bernoulli inference simulation.")
|
||||
conditions_rows <- seq_len(nrow(conditions))
|
||||
results <- parallel::mclapply(conditions_rows, function(s) {
|
||||
start_time_condition <- Sys.time()
|
||||
|
|
@ -311,7 +312,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
|
|||
elapsed_secs = difftime(stop_time_condition,
|
||||
start_time_condition, units = "sec")
|
||||
)
|
||||
message("Finished step ", s, "/", nrow(conditions), "\n")
|
||||
message("Finished step ", s, "/", nrow(conditions))
|
||||
|
||||
# Saving temp
|
||||
temp_file_save <- file.path(temp_dir, paste0(
|
||||
|
|
@ -333,3 +334,4 @@ full_data_frame <- do.call(rbind, results)
|
|||
saveRDS(full_data_frame,
|
||||
file = file_save
|
||||
)
|
||||
message("Finished simulations.")
|
||||
|
|
@ -89,7 +89,7 @@ if (!dir.exists(temp_dir)) {
|
|||
file_save <- file.path(main_dir, paste0("poisson_inference_", start_time, "_", arg[1], "-", arg[2], ".Rds"))
|
||||
|
||||
tictoc::tic()
|
||||
|
||||
message("Starting poisson inference simulation.")
|
||||
conditions_rows <- seq_len(nrow(conditions))
|
||||
results <- parallel::mclapply(conditions_rows, function(s) {
|
||||
start_time_condition <- Sys.time()
|
||||
|
|
@ -311,7 +311,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
|
|||
elapsed_secs = difftime(stop_time_condition,
|
||||
start_time_condition, units = "sec")
|
||||
)
|
||||
message("Finished step ", s, "/", nrow(conditions), "\n")
|
||||
message("Finished step ", s, "/", nrow(conditions))
|
||||
|
||||
# Saving temp
|
||||
temp_file_save <- file.path(temp_dir, paste0(
|
||||
|
|
@ -333,3 +333,4 @@ full_data_frame <- do.call(rbind, results)
|
|||
saveRDS(full_data_frame,
|
||||
file = file_save
|
||||
)
|
||||
message("Finished simulations.")
|
||||
267
code/simulations/simulations_model_selection.R
Normal file
267
code/simulations/simulations_model_selection.R
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
suppressPackageStartupMessages(library("colSBM"))
|
||||
suppressPackageStartupMessages(library(progressr))
|
||||
suppressPackageStartupMessages(library(future))
|
||||
suppressPackageStartupMessages(library(future.apply))
|
||||
|
||||
handlers(global = TRUE)
|
||||
|
||||
plan("multicore", workers = parallelly::availableCores() - 1)
|
||||
|
||||
# Network param
|
||||
nr <- 90
|
||||
nc <- 90
|
||||
|
||||
# Changing parameters
|
||||
epsilons_pi <- seq(from = 0.0, to = 0.28, by = 0.035)
|
||||
epsilons_rho <- seq(from = 0.0, to = 0.28, by = 0.035)
|
||||
pi1 <- matrix(rep(1 / 3, 3), nrow = 1)
|
||||
rho1 <- matrix(rep(1 / 3, 3), nrow = 1)
|
||||
ea <- 0.16
|
||||
alpha <- 0.25 + matrix(
|
||||
c(
|
||||
3 * ea, 2 * ea, ea,
|
||||
2 * ea, 2 * ea, -ea,
|
||||
ea, -ea, ea
|
||||
),
|
||||
byrow = TRUE, nrow = 3, ncol = 3
|
||||
)
|
||||
|
||||
|
||||
prob_order <- seq(1, 3)
|
||||
prob_order <- t(sapply(combinat::permn(prob_order), function(v) v))
|
||||
|
||||
|
||||
repetitions <- seq.int(3)
|
||||
|
||||
conditions <- tidyr::crossing(
|
||||
epsilon_pi = epsilons_pi,
|
||||
epsilon_rho = epsilons_rho,
|
||||
pi2_order = prob_order,
|
||||
rho2_order = prob_order,
|
||||
repetition = repetitions
|
||||
)
|
||||
|
||||
# To speed up computations and debug adding an argument based selection
|
||||
if (!exists("arg")) {
|
||||
arg <- commandArgs(trailingOnly = TRUE)
|
||||
}
|
||||
if (identical(arg, character(0))) {
|
||||
cat(
|
||||
"\nNo arguments provided,",
|
||||
"assuming you want to go over all the conditions."
|
||||
)
|
||||
arg <- c(1, nrow(conditions))
|
||||
batch_name <- "default"
|
||||
} else {
|
||||
if (!is.na(arg[3])) {
|
||||
batch_name <- arg[3]
|
||||
} else {
|
||||
batch_name <- "default"
|
||||
}
|
||||
arg <- as.numeric(arg[-3])
|
||||
}
|
||||
if (arg[1] < 1 | arg[1] > nrow(conditions)) {
|
||||
warning(paste("Arg 1 was invalid, set to 1."))
|
||||
arg[1] <- 1
|
||||
}
|
||||
if (arg[2] > nrow(conditions) | arg[2] < 1) {
|
||||
warning(paste("Arg 2 was invalid, set to", nrow(conditions)))
|
||||
arg[2] <- nrow(conditions)
|
||||
}
|
||||
|
||||
choosed_conditions <- seq.int(from = arg[1], to = arg[2])
|
||||
conditions <- conditions[choosed_conditions, ]
|
||||
|
||||
# Data params
|
||||
main_dir <- file.path("code", "results", "simulations", "model_selection")
|
||||
|
||||
if (!dir.exists(main_dir)) {
|
||||
dir.create(main_dir, recursive = TRUE)
|
||||
}
|
||||
|
||||
start_time <- format(Sys.time(), "%d-%m-%Y_%H-%M-%S")
|
||||
temp_dir <- file.path(main_dir, paste0("tmp", start_time))
|
||||
|
||||
if (!dir.exists(temp_dir)) {
|
||||
dir.create(temp_dir, recursive = TRUE)
|
||||
}
|
||||
|
||||
file_save <- file.path(main_dir, paste0(
|
||||
"model_selection", start_time, "_",
|
||||
batch_name, ".Rds"
|
||||
))
|
||||
|
||||
message("Starting model selection simulation.")
|
||||
tictoc::tic()
|
||||
with_progress({
|
||||
row_conditions <- seq_len(nrow(conditions))
|
||||
p <- progressor(along = row_conditions)
|
||||
results <- future_lapply(row_conditions, function(s) {
|
||||
start_time_condition <- Sys.time()
|
||||
epsilon_pi <- conditions[s, ]$epsilon_pi
|
||||
epsilon_rho <- conditions[s, ]$epsilon_rho
|
||||
|
||||
# Computing the vector with the epsilons
|
||||
current_pi2 <- c(
|
||||
1 / 3 - epsilon_pi,
|
||||
1 / 3,
|
||||
1 / 3 + epsilon_pi
|
||||
)
|
||||
current_rho2 <- c(
|
||||
1 / 3 - epsilon_rho,
|
||||
1 / 3,
|
||||
1 / 3 + epsilon_rho
|
||||
)
|
||||
|
||||
# Permutating the vectors
|
||||
current_pi2 <- current_pi2[conditions[s, ]$pi2_order]
|
||||
current_rho2 <- current_rho2[conditions[s, ]$rho2_order]
|
||||
|
||||
netlist_generated <- list(
|
||||
generate_bipartite_collection(
|
||||
nr, nc, pi1, rho1,
|
||||
alpha,
|
||||
M = 1, return_memberships = TRUE
|
||||
)[[1]],
|
||||
generate_bipartite_collection(
|
||||
nr, nc, current_pi2, current_rho2,
|
||||
alpha,
|
||||
M = 1, return_memberships = TRUE
|
||||
)[[1]]
|
||||
)
|
||||
|
||||
# Extracting the incidence matrices
|
||||
netlist <- lapply(seq_along(netlist_generated), function(m) {
|
||||
return(netlist_generated[[m]]$incidence_matrix)
|
||||
})
|
||||
|
||||
pin <- progressor(steps = 4)
|
||||
|
||||
fitted_bisbmpop_iid <- estimate_colBiSBM(
|
||||
netlist = netlist,
|
||||
colsbm_model = "iid",
|
||||
nb_run = 1,
|
||||
global_opts = list(
|
||||
verbosity = 0,
|
||||
plot_details = 0,
|
||||
nb_cores = parallel::detectCores() - 1
|
||||
)
|
||||
)
|
||||
pin("iid computed")
|
||||
|
||||
fitted_bisbmpop_iid$sep_BiSBM$M <- 2
|
||||
sep_BiSBM <- fitted_bisbmpop_iid$sep_BiSBM
|
||||
|
||||
fitted_bisbmpop_pi <- estimate_colBiSBM(
|
||||
netlist = netlist,
|
||||
colsbm_model = "pi",
|
||||
nb_run = 1,
|
||||
global_opts = list(
|
||||
verbosity = 0,
|
||||
plot_details = 0,
|
||||
nb_cores = parallel::detectCores() - 1
|
||||
),
|
||||
sep_BiSBM = sep_BiSBM
|
||||
)
|
||||
pin("pi computed")
|
||||
|
||||
fitted_bisbmpop_rho <- estimate_colBiSBM(
|
||||
netlist = netlist,
|
||||
colsbm_model = "rho",
|
||||
nb_run = 1,
|
||||
global_opts = list(
|
||||
verbosity = 0,
|
||||
plot_details = 0,
|
||||
nb_cores = parallel::detectCores() - 1
|
||||
),
|
||||
sep_BiSBM = sep_BiSBM
|
||||
)
|
||||
pin("rho computed")
|
||||
|
||||
|
||||
fitted_bisbmpop_pirho <- estimate_colBiSBM(
|
||||
netlist = netlist,
|
||||
colsbm_model = "pirho",
|
||||
nb_run = 1,
|
||||
global_opts = list(
|
||||
verbosity = 0,
|
||||
plot_details = 0,
|
||||
nb_cores = parallel::detectCores() - 1
|
||||
),
|
||||
sep_BiSBM = sep_BiSBM
|
||||
)
|
||||
pin("pirho computed")
|
||||
|
||||
stop_time_condition <- Sys.time()
|
||||
|
||||
# BICLs
|
||||
sep_BICL <- sum(fitted_bisbmpop_iid$sep_BiSBM$BICL)
|
||||
iid_BICL <- fitted_bisbmpop_iid$best_fit$BICL
|
||||
pi_BICL <- fitted_bisbmpop_pi$best_fit$BICL
|
||||
rho_BICL <- fitted_bisbmpop_rho$best_fit$BICL
|
||||
pirho_BICL <- fitted_bisbmpop_pirho$best_fit$BICL
|
||||
BICLs <- c(sep_BICL, iid_BICL, pi_BICL, rho_BICL, pirho_BICL)
|
||||
|
||||
data_frame_output <- data.frame(
|
||||
# The conditions
|
||||
epsilon_pi = epsilon_pi,
|
||||
epsilon_rho = epsilon_rho,
|
||||
pi2 = matrix(current_pi2, nrow = 1),
|
||||
rho2 = matrix(current_rho2, nrow = 1),
|
||||
repetition = as.numeric(conditions[s, ]$repetition),
|
||||
|
||||
# The results
|
||||
## sep
|
||||
sep_BICL = sep_BICL,
|
||||
|
||||
## iid
|
||||
iid_BICL = iid_BICL,
|
||||
iid_Q1 = fitted_bisbmpop_iid$best_fit$Q[1],
|
||||
iid_Q2 = fitted_bisbmpop_iid$best_fit$Q[2],
|
||||
|
||||
## pi
|
||||
pi_BICL = pi_BICL,
|
||||
pi_Q1 = fitted_bisbmpop_pi$best_fit$Q[1],
|
||||
pi_Q2 = fitted_bisbmpop_pi$best_fit$Q[2],
|
||||
|
||||
## pi
|
||||
rho_BICL = rho_BICL,
|
||||
rho_Q1 = fitted_bisbmpop_rho$best_fit$Q[1],
|
||||
rho_Q2 = fitted_bisbmpop_rho$best_fit$Q[2],
|
||||
|
||||
## pirho
|
||||
pirho_BICL = pirho_BICL,
|
||||
pirho_Q1 = fitted_bisbmpop_pirho$best_fit$Q[1],
|
||||
pirho_Q2 = fitted_bisbmpop_pirho$best_fit$Q[2],
|
||||
|
||||
# Preferred model
|
||||
preferred_model = c(
|
||||
"sep", "iid",
|
||||
"pi", "rho", "pirho"
|
||||
)[which.max(BICLs)],
|
||||
elapsed_secs = difftime(stop_time_condition,
|
||||
start_time_condition,
|
||||
units = "sec"
|
||||
)
|
||||
)
|
||||
message("Finished step ", s, "/", nrow(conditions))
|
||||
|
||||
# Saving temp
|
||||
temp_file_save <- file.path(temp_dir, paste0(
|
||||
"conditions_", s, "_on_",
|
||||
nrow(conditions), ".Rds"
|
||||
))
|
||||
|
||||
saveRDS(object = data_frame_output, file = temp_file_save)
|
||||
p(sprintf("Finished condition %i/%i", s, nrow(conditions)))
|
||||
|
||||
return(data_frame_output)
|
||||
})
|
||||
})
|
||||
tictoc::toc()
|
||||
full_data_frame <- do.call(rbind, results)
|
||||
|
||||
saveRDS(full_data_frame,
|
||||
file = file_save
|
||||
)
|
||||
message("Finished simulations.")
|
||||
Loading…
Add table
Reference in a new issue