simulations : NA robustness, switching to future backend
This commit is contained in:
parent
4dbdc6b61c
commit
e9c4dc3883
1 changed files with 128 additions and 123 deletions
|
|
@ -9,6 +9,7 @@ if (!all((necessary_packages %in% installed.packages()))) {
|
|||
library("pROC")
|
||||
library("colSBM")
|
||||
|
||||
future::plan(future::multicore())
|
||||
|
||||
# Arguments
|
||||
arg <- commandArgs(trailingOnly = TRUE)
|
||||
|
|
@ -73,13 +74,15 @@ collections <- list(
|
|||
"iid" = c(
|
||||
generate_bipartite_collection(nr1, nc1,
|
||||
pir, pic,
|
||||
alpha, 1,
|
||||
alpha,
|
||||
M = 1,
|
||||
model = "iid",
|
||||
return_memberships = TRUE
|
||||
),
|
||||
generate_bipartite_collection(nr2, nc2,
|
||||
pir, pic,
|
||||
alpha, M - 1,
|
||||
alpha,
|
||||
M = M - 1,
|
||||
model = "iid",
|
||||
return_memberships = TRUE
|
||||
)
|
||||
|
|
@ -162,142 +165,144 @@ message(
|
|||
"Starting NA robustness simulation with ", sampling,
|
||||
" sampling and ", struct, " structure."
|
||||
)
|
||||
result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
|
||||
# Looping over conditions
|
||||
prop_NAs <- conditions[current, ][["prop_NAs"]]
|
||||
model <- as.character(conditions[current, ][["model"]])
|
||||
bipartite_collection <- collections[[model]]
|
||||
result_list <- future.apply::future_lapply(
|
||||
seq_len(nrow(conditions)),
|
||||
function(current) {
|
||||
# Looping over conditions
|
||||
prop_NAs <- conditions[current, ][["prop_NAs"]]
|
||||
model <- as.character(conditions[current, ][["model"]])
|
||||
bipartite_collection <- collections[[model]]
|
||||
|
||||
# This is a list of the M incidence matrices
|
||||
bipartite_collection_incidence <- lapply(seq.int(M), function(m) {
|
||||
bipartite_collection[[m]][["incidence_matrix"]]
|
||||
})
|
||||
# This is a list of the M incidence matrices
|
||||
bipartite_collection_incidence <- lapply(seq.int(M), function(m) {
|
||||
bipartite_collection[[m]][["incidence_matrix"]]
|
||||
})
|
||||
|
||||
Z <- lapply(seq.int(M), function(m) {
|
||||
list(
|
||||
bipartite_collection[[m]][["row_blockmemberships"]],
|
||||
bipartite_collection[[m]][["col_blockmemberships"]]
|
||||
Z <- lapply(seq.int(M), function(m) {
|
||||
list(
|
||||
bipartite_collection[[m]][["row_blockmemberships"]],
|
||||
bipartite_collection[[m]][["col_blockmemberships"]]
|
||||
)
|
||||
})
|
||||
|
||||
NAs_selected_index <- switch(sampling,
|
||||
"uniform" = {
|
||||
seq_len(length(bipartite_collection_incidence[[1]]))
|
||||
},
|
||||
"row" = {
|
||||
row_cluster_selected <- sample.int(n = length(pir), size = 1)
|
||||
row_nodes_selected <- which(Z[[1]][[1]] == row_cluster_selected)
|
||||
col_nodes_selected <- seq(1, nc2)
|
||||
|
||||
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
|
||||
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
(NAs_selected_index_exp[["col"]] - 1) *
|
||||
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
|
||||
},
|
||||
"col" = {
|
||||
row_nodes_selected <- seq(1, nr2)
|
||||
col_cluster_selected <- sample.int(n = length(pic), size = 1)
|
||||
col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected)
|
||||
|
||||
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
|
||||
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
(NAs_selected_index_exp[["col"]] - 1) *
|
||||
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
|
||||
},
|
||||
"rowcol" = {
|
||||
row_cluster_selected <- sample.int(n = length(pir), size = 1)
|
||||
row_nodes_selected <- which(Z[[1]][[1]] == row_cluster_selected)
|
||||
col_cluster_selected <- sample.int(n = length(pic), size = 1)
|
||||
col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected)
|
||||
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
|
||||
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
(NAs_selected_index_exp[["col"]] - 1) *
|
||||
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
NAs_selected_index <- switch(sampling,
|
||||
"uniform" = {
|
||||
seq_len(length(bipartite_collection_incidence[[1]]))
|
||||
},
|
||||
"row" = {
|
||||
row_cluster_selected <- sample.int(n = length(pir), size = 1)
|
||||
row_nodes_selected <- which(Z[[1]][[1]] == row_cluster_selected)
|
||||
col_nodes_selected <- seq(1, nc2)
|
||||
NAs_index <- sample(NAs_selected_index, size = floor(prop_NAs * length(NAs_selected_index)))
|
||||
|
||||
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
|
||||
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
(NAs_selected_index_exp[["col"]] - 1) *
|
||||
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
|
||||
},
|
||||
"col" = {
|
||||
row_nodes_selected <- seq(1, nr2)
|
||||
col_cluster_selected <- sample.int(n = length(pic), size = 1)
|
||||
col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected)
|
||||
|
||||
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
|
||||
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
(NAs_selected_index_exp[["col"]] - 1) *
|
||||
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
|
||||
},
|
||||
"rowcol" = {
|
||||
row_cluster_selected <- sample.int(n = length(pir), size = 1)
|
||||
row_nodes_selected <- which(Z[[1]][[1]] == row_cluster_selected)
|
||||
col_cluster_selected <- sample.int(n = length(pic), size = 1)
|
||||
col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected)
|
||||
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
|
||||
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
(NAs_selected_index_exp[["col"]] - 1) *
|
||||
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
|
||||
}
|
||||
)
|
||||
|
||||
NAs_index <- sample(NAs_selected_index, size = floor(prop_NAs * length(NAs_selected_index)))
|
||||
|
||||
real_val_NAs <- bipartite_collection_incidence[[1]][NAs_index]
|
||||
bipartite_collection_incidence[[1]][NAs_index] <- NA
|
||||
NAs_coordinates <- which(is.na(bipartite_collection_incidence[[1]]),
|
||||
arr.ind = TRUE
|
||||
)
|
||||
|
||||
start_time <- Sys.time()
|
||||
mybisbmpop <- estimate_colBiSBM(
|
||||
netlist = bipartite_collection_incidence, colsbm_model = model,
|
||||
nb_run = 1,
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1, verbosity = 0
|
||||
real_val_NAs <- bipartite_collection_incidence[[1]][NAs_index]
|
||||
bipartite_collection_incidence[[1]][NAs_index] <- NA
|
||||
NAs_coordinates <- which(is.na(bipartite_collection_incidence[[1]]),
|
||||
arr.ind = TRUE
|
||||
)
|
||||
)
|
||||
stop_time <- Sys.time()
|
||||
|
||||
baseline_LBM <- estimate_colBiSBM(
|
||||
netlist = bipartite_collection_incidence[[1]], colsbm_model = "iid",
|
||||
nb_run = 1,
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1, verbosity = 0
|
||||
start_time <- Sys.time()
|
||||
mybisbmpop <- estimate_colBiSBM(
|
||||
netlist = bipartite_collection_incidence, colsbm_model = model,
|
||||
nb_run = 1,
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1, verbosity = 0
|
||||
)
|
||||
)
|
||||
)
|
||||
stop_time <- Sys.time()
|
||||
|
||||
# Predicted links
|
||||
X_hat_LBM <- baseline_LBM[["best_fit"]][["tau"]][[1]][[1]] %*%
|
||||
baseline_LBM[["best_fit"]][["alpha"]] %*%
|
||||
t(baseline_LBM[["best_fit"]][["tau"]][[1]][[2]])
|
||||
X_hat <- mybisbmpop[["best_fit"]][["tau"]][[1]][[1]] %*%
|
||||
mybisbmpop[["best_fit"]][["alpha"]] %*%
|
||||
t(mybisbmpop[["best_fit"]][["tau"]][[1]][[2]])
|
||||
baseline_LBM <- estimate_colBiSBM(
|
||||
netlist = bipartite_collection_incidence[[1]], colsbm_model = "iid",
|
||||
nb_run = 1,
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1, verbosity = 0
|
||||
)
|
||||
)
|
||||
|
||||
# Compute ROC and AUC
|
||||
auc_LBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat_LBM[NAs_index]))
|
||||
auc_colBiSBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat[NAs_index]))
|
||||
# Predicted links
|
||||
X_hat_LBM <- baseline_LBM[["best_fit"]][["tau"]][[1]][[1]] %*%
|
||||
baseline_LBM[["best_fit"]][["alpha"]] %*%
|
||||
t(baseline_LBM[["best_fit"]][["tau"]][[1]][[2]])
|
||||
X_hat <- mybisbmpop[["best_fit"]][["tau"]][[1]][[1]] %*%
|
||||
mybisbmpop[["best_fit"]][["alpha"]] %*%
|
||||
t(mybisbmpop[["best_fit"]][["tau"]][[1]][[2]])
|
||||
|
||||
# Computing ARI on the NAs
|
||||
out_data_frame <- data.frame(
|
||||
prop_NAs = prop_NAs,
|
||||
model = model,
|
||||
repetition = conditions[current, ][["repetition"]],
|
||||
auc_LBM = auc_LBM,
|
||||
auc_colBiSBM = auc_colBiSBM,
|
||||
arirow_LBM = aricode::ARI(
|
||||
Z[[1]][[1]],
|
||||
baseline_LBM[["best_fit"]][["Z"]][[1]][[1]]
|
||||
),
|
||||
aricol_LBM = aricode::ARI(
|
||||
Z[[1]][[2]],
|
||||
baseline_LBM[["best_fit"]][["Z"]][[1]][[2]]
|
||||
),
|
||||
arirow_colBiSBM = aricode::ARI(
|
||||
Z[[1]][[1]],
|
||||
mybisbmpop[["best_fit"]][["Z"]][[1]][[1]]
|
||||
),
|
||||
aricol_colBiSBM = aricode::ARI(
|
||||
Z[[1]][[2]],
|
||||
mybisbmpop[["best_fit"]][["Z"]][[1]][[2]]
|
||||
),
|
||||
elapsed_secs = difftime(stop_time, start_time, units = "sec"),
|
||||
sampling = sampling,
|
||||
struct = struct
|
||||
)
|
||||
# Compute ROC and AUC
|
||||
auc_LBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat_LBM[NAs_index]))
|
||||
auc_colBiSBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat[NAs_index]))
|
||||
|
||||
message("Finished step ", current, "/", nrow(conditions), "\n")
|
||||
# Computing ARI on the NAs
|
||||
out_data_frame <- data.frame(
|
||||
prop_NAs = prop_NAs,
|
||||
model = model,
|
||||
repetition = conditions[current, ][["repetition"]],
|
||||
auc_LBM = auc_LBM,
|
||||
auc_colBiSBM = auc_colBiSBM,
|
||||
arirow_LBM = aricode::ARI(
|
||||
Z[[1]][[1]],
|
||||
baseline_LBM[["best_fit"]][["Z"]][[1]][[1]]
|
||||
),
|
||||
aricol_LBM = aricode::ARI(
|
||||
Z[[1]][[2]],
|
||||
baseline_LBM[["best_fit"]][["Z"]][[1]][[2]]
|
||||
),
|
||||
arirow_colBiSBM = aricode::ARI(
|
||||
Z[[1]][[1]],
|
||||
mybisbmpop[["best_fit"]][["Z"]][[1]][[1]]
|
||||
),
|
||||
aricol_colBiSBM = aricode::ARI(
|
||||
Z[[1]][[2]],
|
||||
mybisbmpop[["best_fit"]][["Z"]][[1]][[2]]
|
||||
),
|
||||
elapsed_secs = difftime(stop_time, start_time, units = "sec"),
|
||||
sampling = sampling,
|
||||
struct = struct
|
||||
)
|
||||
|
||||
# Saving temp
|
||||
temp_file_save <- file.path(temp_dir, paste0(
|
||||
"conditions_", current, "_on_",
|
||||
nrow(conditions), ".Rds"
|
||||
))
|
||||
message("Finished step ", current, "/", nrow(conditions), "\n")
|
||||
|
||||
saveRDS(out_data_frame, file = temp_file_save)
|
||||
# Saving temp
|
||||
temp_file_save <- file.path(temp_dir, paste0(
|
||||
"conditions_", current, "_on_",
|
||||
nrow(conditions), ".Rds"
|
||||
))
|
||||
|
||||
return(out_data_frame)
|
||||
},
|
||||
mc.cores = parallel::detectCores() - 1
|
||||
saveRDS(out_data_frame, file = temp_file_save)
|
||||
|
||||
return(out_data_frame)
|
||||
},
|
||||
future.seed = NULL
|
||||
)
|
||||
|
||||
result_dataframe <- do.call("rbind", result_list)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue