library(sbm) args <- commandArgs(trailingOnly = TRUE) if (length(args) <= 1) { max_arg <- args print("One argument was provided, will be treated as max") min_arg <- NA } else { if (length(args) > 2) { stop("Too many arguments provided") } print("Two arguments were provided, will be treated as min and max") min_arg <- args[1] max_arg <- args[2] } if (identical(max_arg, character(0)) || is.na(as.integer(max_arg)) || as.integer(max_arg) <= 0) { max_nb_col <- 5000L print(paste0("No or incorrect argument was passed setting max to default value : ", max_nb_col)) } else { max_nb_col <- as.integer(max_arg) print(paste0("Setting to max provided value : ", max_nb_col)) } if (identical(min_arg, character(0)) || is.na(as.integer(min_arg)) || as.integer(min_arg) <= 0) { min_nb_col <- 50L print(paste0("No or incorrect argument was passed setting min to default value : ", min_nb_col)) } else { min_nb_col <- as.integer(min_arg) print(paste0("Setting to min provided value : ", min_nb_col)) } if (max_nb_col - min_nb_col <= 0) { stop("The range between min and max should be positive and larger than 0") } model <- "bernoulli" set.seed(1234) nb_row <- 50 blockProp <- list( c(0.25, 0.75), c(0.1, 0.4, 0.5) ) connectParam <- list(mean = matrix(c( 0.9, 0.5, 0.1, 0.3, 0.2, 0.05 ), nrow = 2L, ncol = 3L)) nb_col_seq <- seq(min_nb_col, max_nb_col, by = 50) lbm_list <- lapply(nb_col_seq, function(nb_col) { sampleBipartiteSBM( nbNodes = c(nb_row, nb_col), blockProp = blockProp, connectParam = connectParam, model = model )$rNetwork() }) unonehot <- function(mat) { apply(mat, 1, FUN = function(row) which(row == 1)) } lbm_matrices <- lapply(lbm_list, function(lbm) lbm$networkData) lbm_row_memberships <- lapply(lbm_list, function(lbm) apply(lbm$indMemberships$row, 1, FUN = function(row) which(row == 1))) lbm_col_memberships <- lapply(lbm_list, function(lbm) apply(lbm$indMemberships$col, 1, FUN = function(col) which(col == 1))) library(here) results_dir <- here("results", "increasing_size") save_path <- here(results_dir, paste0("sbm_incr_", model, "_from_", min_nb_col, "_to_", max_nb_col, ".Rds")) if (!dir.exists(results_dir)) { dir.create(results_dir, recursive = TRUE) } print(paste0("Final results will be saved to ", save_path)) # (epoch <- as.integer(Sys.time())) # temp_dir <- here(results_dir, paste0(min_nb_col, "_to_", max_nb_col, "tmp", epoch)) # print(paste0("Temp saved to ", temp_dir)) library(parallelly) library(future) library(future.apply) library(future.callr) plan(tweak("callr", workers = 64)) lbm_res <- future_lapply(seq_along(lbm_matrices), function(mat_idx) { start_time <- Sys.time() fit <- estimateBipartiteSBM(netMat = lbm_matrices[[mat_idx]], estimOptions = list(plot = 0)) stop_time <- Sys.time() out_list <- list(fit = fit, time = difftime(stop_time, start_time, units = "secs")) return(out_list) }, future.seed = TRUE) lbm_fits <- lapply(lbm_res, function(lbm) lbm$fit) lbm_times <- sapply(lbm_res, function(lbm) lbm$time) lbm_fit_row <- lapply(lbm_fits, function(lbm) unonehot(lbm$indMemberships$row)) lbm_fit_col <- lapply(lbm_fits, function(lbm) unonehot(lbm$indMemberships$col)) library(aricode) ari_row <- sapply(seq_along((lbm_matrices)), function(idx) { ARI(lbm_row_memberships[[idx]], lbm_fit_row[[idx]]) }) ari_col <- sapply(seq_along((lbm_matrices)), function(idx) { ARI(lbm_col_memberships[[idx]], lbm_fit_col[[idx]]) }) out_df <- data.frame(n1 = nb_row, n2 = nb_col_seq, model = model, time = lbm_times, ari_row = ari_row, ari_col = ari_col) saveRDS(out_df, save_path)