Handling arguments and saving

This commit is contained in:
Louis 2025-12-22 15:17:37 +01:00
parent 0d45da0316
commit 5e8644493e

View file

@ -1,14 +1,41 @@
library(sbm) library(sbm)
args <- commandArgs(trailingOnly = TRUE) args <- commandArgs(trailingOnly = TRUE)
if (identical(args, character(0)) || is.na(as.integer(args))) { if (length(args) <= 1) {
max_nb_col <- 5000L max_arg <- args
print(paste0("No or incorrect argument was passed setting to default value : ", max_nb_col)) print("One argument was provided, will be treated as max")
min_arg <- NA
} else { } else {
max_nb_col <- as.integer(args) if (length(args) > 2) {
print(paste0("Setting to provided value : ", max_nb_col)) stop("Too many arguments provided")
}
print("Two arguments were provided, will be treated as min and max")
min_arg <- args[1]
max_arg <- args[2]
} }
if (identical(max_arg, character(0)) || is.na(as.integer(max_arg))) {
max_nb_col <- 5000L
print(paste0("No or incorrect argument was passed setting max to default value : ", max_nb_col))
} else {
max_nb_col <- as.integer(max_arg)
print(paste0("Setting to max provided value : ", max_nb_col))
}
if (identical(min_arg, character(0)) || is.na(as.integer(min_arg))) {
min_nb_col <- 50L
print(paste0("No or incorrect argument was passed setting min to default value : ", min_nb_col))
} else {
min_nb_col <- as.integer(min_arg)
print(paste0("Setting to min provided value : ", min_nb_col))
}
if (max_nb_col - min_nb_col <= 0) {
stop("The range between min and max should be positive and larger than 0")
}
model <- "bernoulli"
set.seed(1234) set.seed(1234)
nb_row <- 50 nb_row <- 50
blockProp <- list( blockProp <- list(
@ -21,12 +48,12 @@ connectParam <- list(mean = matrix(c(
0.3, 0.2, 0.05 0.3, 0.2, 0.05
), nrow = 2L, ncol = 3L)) ), nrow = 2L, ncol = 3L))
nb_col_seq <- seq(50, max_nb_col, by = 50) nb_col_seq <- seq(min_nb_col, max_nb_col, by = 50)
lbm_list <- lapply(nb_col_seq, function(nb_col) { lbm_list <- lapply(nb_col_seq, function(nb_col) {
sampleBipartiteSBM( sampleBipartiteSBM(
nbNodes = c(nb_row, nb_col), blockProp = blockProp, connectParam = connectParam, nbNodes = c(nb_row, nb_col), blockProp = blockProp, connectParam = connectParam,
model = "bernoulli" model = model
)$rNetwork() )$rNetwork()
}) })
@ -38,6 +65,18 @@ lbm_matrices <- lapply(lbm_list, function(lbm) lbm$networkData)
lbm_row_memberships <- lapply(lbm_list, function(lbm) apply(lbm$indMemberships$row, 1, FUN = function(row) which(row == 1))) lbm_row_memberships <- lapply(lbm_list, function(lbm) apply(lbm$indMemberships$row, 1, FUN = function(row) which(row == 1)))
lbm_col_memberships <- lapply(lbm_list, function(lbm) apply(lbm$indMemberships$col, 1, FUN = function(col) which(col == 1))) lbm_col_memberships <- lapply(lbm_list, function(lbm) apply(lbm$indMemberships$col, 1, FUN = function(col) which(col == 1)))
library(here)
results_dir <- here("results", "increasing_size")
save_path <- here(results_dir, paste0("sbm_incr_", model, "_from_", min_nb_col, "_to_", max_nb_col, ".Rds"))
print(paste0("Final results will be saved to ", save_path))
# (epoch <- as.integer(Sys.time()))
# temp_dir <- here(results_dir, paste0(min_nb_col, "_to_", max_nb_col, "tmp", epoch))
# print(paste0("Temp saved to ", temp_dir))
library(parallelly) library(parallelly)
library(future) library(future)
library(future.apply) library(future.apply)
@ -45,11 +84,11 @@ library(future.callr)
plan(tweak("callr", workers = 64)) plan(tweak("callr", workers = 64))
lbm_res <- future_lapply(lbm_matrices, function(mat) { lbm_res <- future_lapply(seq_along(lbm_matrices), function(mat_idx) {
start_time <- Sys.time() start_time <- Sys.time()
fit <- estimateBipartiteSBM(netMat = mat, estimOptions = list(plot = 0)) fit <- estimateBipartiteSBM(netMat = lbm_matrices[[mat_idx]], estimOptions = list(plot = 0))
stop_time <- Sys.time() stop_time <- Sys.time()
return(list(fit = fit, time = stop_time - start_time)) return(out_list)
}, future.seed = TRUE) }, future.seed = TRUE)
lbm_fits <- lapply(lbm_res, function(lbm) lbm$fit) lbm_fits <- lapply(lbm_res, function(lbm) lbm$fit)
@ -60,9 +99,14 @@ lbm_fit_col <- lapply(lbm_fits, function(lbm) unonehot(lbm$indMemberships$col))
library(aricode) library(aricode)
sapply(seq_along((lbm_matrices)), function(idx) { ari_row <- sapply(seq_along((lbm_matrices)), function(idx) {
c( ARI(lbm_row_memberships[[idx]], lbm_fit_row[[idx]])
"row" = ARI(lbm_row_memberships[[idx]], lbm_fit_row[[idx]]),
"col" = ARI(lbm_col_memberships[[idx]], lbm_fit_col[[idx]])
)
}) })
ari_col <- sapply(seq_along((lbm_matrices)), function(idx) {
ARI(lbm_col_memberships[[idx]], lbm_fit_col[[idx]])
})
out_df <- data.frame(n1 = nb_row, n2 = nb_col_seq, model = model, time = lbm_times, ari_row = ari_row, ari_col = ari_col)
saveRDS(out_df, save_path)