simulations : NA robustness, switching to future backend

2024-07-18 09:52:58 +02:00 · 2024-07-18 09:52:58 +02:00 · e9c4dc3883
commit e9c4dc3883
parent 4dbdc6b61c
1 changed files with 128 additions and 123 deletions
--- a/code/simulations/simulations_NA_robustness.R
+++ b/code/simulations/simulations_NA_robustness.R
@ -9,6 +9,7 @@ if (!all((necessary_packages %in% installed.packages()))) {
 library("pROC")
 library("colSBM")

+future::plan(future::multicore())

 # Arguments
 arg <- commandArgs(trailingOnly = TRUE)
@ -73,13 +74,15 @@ collections <- list(
    "iid" = c(
        generate_bipartite_collection(nr1, nc1,
            pir, pic,
-            alpha, 1,
+            alpha,
+            M = 1,
            model = "iid",
            return_memberships = TRUE
        ),
        generate_bipartite_collection(nr2, nc2,
            pir, pic,
-            alpha, M - 1,
+            alpha,
+            M = M - 1,
            model = "iid",
            return_memberships = TRUE
        )
@ -162,142 +165,144 @@ message(
    "Starting NA robustness simulation with ", sampling,
    " sampling and ", struct, " structure."
 )
-result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
-    # Looping over conditions
-    prop_NAs <- conditions[current, ][["prop_NAs"]]
-    model <- as.character(conditions[current, ][["model"]])
-    bipartite_collection <- collections[[model]]
+result_list <- future.apply::future_lapply(
+    seq_len(nrow(conditions)),
+    function(current) {
+        # Looping over conditions
+        prop_NAs <- conditions[current, ][["prop_NAs"]]
+        model <- as.character(conditions[current, ][["model"]])
+        bipartite_collection <- collections[[model]]

-    # This is a list of the M incidence matrices
-    bipartite_collection_incidence <- lapply(seq.int(M), function(m) {
-        bipartite_collection[[m]][["incidence_matrix"]]
-    })
+        # This is a list of the M incidence matrices
+        bipartite_collection_incidence <- lapply(seq.int(M), function(m) {
+            bipartite_collection[[m]][["incidence_matrix"]]
+        })

-    Z <- lapply(seq.int(M), function(m) {
-        list(
-            bipartite_collection[[m]][["row_blockmemberships"]],
-            bipartite_collection[[m]][["col_blockmemberships"]]
+        Z <- lapply(seq.int(M), function(m) {
+            list(
+                bipartite_collection[[m]][["row_blockmemberships"]],
+                bipartite_collection[[m]][["col_blockmemberships"]]
+            )
+        })
+
+        NAs_selected_index <- switch(sampling,
+            "uniform" = {
+                seq_len(length(bipartite_collection_incidence[[1]]))
+            },
+            "row" = {
+                row_cluster_selected <- sample.int(n = length(pir), size = 1)
+                row_nodes_selected <- which(Z[[1]][[1]] == row_cluster_selected)
+                col_nodes_selected <- seq(1, nc2)
+
+                NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
+
+                #  Computes the index as a single number, R stores matrices by column
+                (NAs_selected_index_exp[["col"]] - 1) *
+                    nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
+            },
+            "col" = {
+                row_nodes_selected <- seq(1, nr2)
+                col_cluster_selected <- sample.int(n = length(pic), size = 1)
+                col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected)
+
+                NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
+
+                #  Computes the index as a single number, R stores matrices by column
+                (NAs_selected_index_exp[["col"]] - 1) *
+                    nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
+            },
+            "rowcol" = {
+                row_cluster_selected <- sample.int(n = length(pir), size = 1)
+                row_nodes_selected <- which(Z[[1]][[1]] == row_cluster_selected)
+                col_cluster_selected <- sample.int(n = length(pic), size = 1)
+                col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected)
+                NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
+
+                #  Computes the index as a single number, R stores matrices by column
+                (NAs_selected_index_exp[["col"]] - 1) *
+                    nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
+            }
        )
-    })

-    NAs_selected_index <- switch(sampling,
-        "uniform" = {
-            seq_len(length(bipartite_collection_incidence[[1]]))
-        },
-        "row" = {
-            row_cluster_selected <- sample.int(n = length(pir), size = 1)
-            row_nodes_selected <- which(Z[[1]][[1]] == row_cluster_selected)
-            col_nodes_selected <- seq(1, nc2)
+        NAs_index <- sample(NAs_selected_index, size = floor(prop_NAs * length(NAs_selected_index)))

-            NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
-
-            #  Computes the index as a single number, R stores matrices by column
-            (NAs_selected_index_exp[["col"]] - 1) *
-                nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
-        },
-        "col" = {
-            row_nodes_selected <- seq(1, nr2)
-            col_cluster_selected <- sample.int(n = length(pic), size = 1)
-            col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected)
-
-            NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
-
-            #  Computes the index as a single number, R stores matrices by column
-            (NAs_selected_index_exp[["col"]] - 1) *
-                nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
-        },
-        "rowcol" = {
-            row_cluster_selected <- sample.int(n = length(pir), size = 1)
-            row_nodes_selected <- which(Z[[1]][[1]] == row_cluster_selected)
-            col_cluster_selected <- sample.int(n = length(pic), size = 1)
-            col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected)
-            NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
-
-            #  Computes the index as a single number, R stores matrices by column
-            (NAs_selected_index_exp[["col"]] - 1) *
-                nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
-        }
-    )
-
-    NAs_index <- sample(NAs_selected_index, size = floor(prop_NAs * length(NAs_selected_index)))
-
-    real_val_NAs <- bipartite_collection_incidence[[1]][NAs_index]
-    bipartite_collection_incidence[[1]][NAs_index] <- NA
-    NAs_coordinates <- which(is.na(bipartite_collection_incidence[[1]]),
-        arr.ind = TRUE
-    )
-
-    start_time <- Sys.time()
-    mybisbmpop <- estimate_colBiSBM(
-        netlist = bipartite_collection_incidence, colsbm_model = model,
-        nb_run = 1,
-        global_opts = list(
-            nb_cores = parallel::detectCores() - 1, verbosity = 0
+        real_val_NAs <- bipartite_collection_incidence[[1]][NAs_index]
+        bipartite_collection_incidence[[1]][NAs_index] <- NA
+        NAs_coordinates <- which(is.na(bipartite_collection_incidence[[1]]),
+            arr.ind = TRUE
        )
-    )
-    stop_time <- Sys.time()

-    baseline_LBM <- estimate_colBiSBM(
-        netlist = bipartite_collection_incidence[[1]], colsbm_model = "iid",
-        nb_run = 1,
-        global_opts = list(
-            nb_cores = parallel::detectCores() - 1, verbosity = 0
+        start_time <- Sys.time()
+        mybisbmpop <- estimate_colBiSBM(
+            netlist = bipartite_collection_incidence, colsbm_model = model,
+            nb_run = 1,
+            global_opts = list(
+                nb_cores = parallel::detectCores() - 1, verbosity = 0
+            )
        )
-    )
+        stop_time <- Sys.time()

-    # Predicted links
-    X_hat_LBM <- baseline_LBM[["best_fit"]][["tau"]][[1]][[1]] %*%
-        baseline_LBM[["best_fit"]][["alpha"]] %*%
-        t(baseline_LBM[["best_fit"]][["tau"]][[1]][[2]])
-    X_hat <- mybisbmpop[["best_fit"]][["tau"]][[1]][[1]] %*%
-        mybisbmpop[["best_fit"]][["alpha"]] %*%
-        t(mybisbmpop[["best_fit"]][["tau"]][[1]][[2]])
+        baseline_LBM <- estimate_colBiSBM(
+            netlist = bipartite_collection_incidence[[1]], colsbm_model = "iid",
+            nb_run = 1,
+            global_opts = list(
+                nb_cores = parallel::detectCores() - 1, verbosity = 0
+            )
+        )

-    # Compute ROC and AUC
-    auc_LBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat_LBM[NAs_index]))
-    auc_colBiSBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat[NAs_index]))
+        # Predicted links
+        X_hat_LBM <- baseline_LBM[["best_fit"]][["tau"]][[1]][[1]] %*%
+            baseline_LBM[["best_fit"]][["alpha"]] %*%
+            t(baseline_LBM[["best_fit"]][["tau"]][[1]][[2]])
+        X_hat <- mybisbmpop[["best_fit"]][["tau"]][[1]][[1]] %*%
+            mybisbmpop[["best_fit"]][["alpha"]] %*%
+            t(mybisbmpop[["best_fit"]][["tau"]][[1]][[2]])

-    # Computing ARI on the NAs
-    out_data_frame <- data.frame(
-        prop_NAs = prop_NAs,
-        model = model,
-        repetition = conditions[current, ][["repetition"]],
-        auc_LBM = auc_LBM,
-        auc_colBiSBM = auc_colBiSBM,
-        arirow_LBM = aricode::ARI(
-            Z[[1]][[1]],
-            baseline_LBM[["best_fit"]][["Z"]][[1]][[1]]
-        ),
-        aricol_LBM = aricode::ARI(
-            Z[[1]][[2]],
-            baseline_LBM[["best_fit"]][["Z"]][[1]][[2]]
-        ),
-        arirow_colBiSBM = aricode::ARI(
-            Z[[1]][[1]],
-            mybisbmpop[["best_fit"]][["Z"]][[1]][[1]]
-        ),
-        aricol_colBiSBM = aricode::ARI(
-            Z[[1]][[2]],
-            mybisbmpop[["best_fit"]][["Z"]][[1]][[2]]
-        ),
-        elapsed_secs = difftime(stop_time, start_time, units = "sec"),
-        sampling = sampling,
-        struct = struct
-    )
+        # Compute ROC and AUC
+        auc_LBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat_LBM[NAs_index]))
+        auc_colBiSBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat[NAs_index]))

-    message("Finished step ", current, "/", nrow(conditions), "\n")
+        # Computing ARI on the NAs
+        out_data_frame <- data.frame(
+            prop_NAs = prop_NAs,
+            model = model,
+            repetition = conditions[current, ][["repetition"]],
+            auc_LBM = auc_LBM,
+            auc_colBiSBM = auc_colBiSBM,
+            arirow_LBM = aricode::ARI(
+                Z[[1]][[1]],
+                baseline_LBM[["best_fit"]][["Z"]][[1]][[1]]
+            ),
+            aricol_LBM = aricode::ARI(
+                Z[[1]][[2]],
+                baseline_LBM[["best_fit"]][["Z"]][[1]][[2]]
+            ),
+            arirow_colBiSBM = aricode::ARI(
+                Z[[1]][[1]],
+                mybisbmpop[["best_fit"]][["Z"]][[1]][[1]]
+            ),
+            aricol_colBiSBM = aricode::ARI(
+                Z[[1]][[2]],
+                mybisbmpop[["best_fit"]][["Z"]][[1]][[2]]
+            ),
+            elapsed_secs = difftime(stop_time, start_time, units = "sec"),
+            sampling = sampling,
+            struct = struct
+        )

-    #  Saving temp
-    temp_file_save <- file.path(temp_dir, paste0(
-        "conditions_", current, "_on_",
-        nrow(conditions), ".Rds"
-    ))
+        message("Finished step ", current, "/", nrow(conditions), "\n")

-    saveRDS(out_data_frame, file = temp_file_save)
+        #  Saving temp
+        temp_file_save <- file.path(temp_dir, paste0(
+            "conditions_", current, "_on_",
+            nrow(conditions), ".Rds"
+        ))

-    return(out_data_frame)
-},
-mc.cores = parallel::detectCores() - 1
+        saveRDS(out_data_frame, file = temp_file_save)
+
+        return(out_data_frame)
+    },
+    future.seed = NULL
 )

 result_dataframe <- do.call("rbind", result_list)