117 lines
3.6 KiB
R
117 lines
3.6 KiB
R
library(sbm)
|
|
|
|
args <- commandArgs(trailingOnly = TRUE)
|
|
if (length(args) <= 1) {
|
|
max_arg <- args
|
|
print("One argument was provided, will be treated as max")
|
|
min_arg <- NA
|
|
} else {
|
|
if (length(args) > 2) {
|
|
stop("Too many arguments provided")
|
|
}
|
|
print("Two arguments were provided, will be treated as min and max")
|
|
min_arg <- args[1]
|
|
max_arg <- args[2]
|
|
}
|
|
|
|
if (identical(max_arg, character(0)) || is.na(as.integer(max_arg)) || as.integer(max_arg) <= 0) {
|
|
max_nb_col <- 5000L
|
|
print(paste0("No or incorrect argument was passed setting max to default value : ", max_nb_col))
|
|
} else {
|
|
max_nb_col <- as.integer(max_arg)
|
|
print(paste0("Setting to max provided value : ", max_nb_col))
|
|
}
|
|
|
|
if (identical(min_arg, character(0)) || is.na(as.integer(min_arg)) || as.integer(min_arg) <= 0) {
|
|
min_nb_col <- 50L
|
|
print(paste0("No or incorrect argument was passed setting min to default value : ", min_nb_col))
|
|
} else {
|
|
min_nb_col <- as.integer(min_arg)
|
|
print(paste0("Setting to min provided value : ", min_nb_col))
|
|
}
|
|
|
|
if (max_nb_col - min_nb_col <= 0) {
|
|
stop("The range between min and max should be positive and larger than 0")
|
|
}
|
|
|
|
model <- "bernoulli"
|
|
|
|
set.seed(1234)
|
|
nb_row <- 50
|
|
blockProp <- list(
|
|
c(0.25, 0.75),
|
|
c(0.1, 0.4, 0.5)
|
|
)
|
|
|
|
connectParam <- list(mean = matrix(c(
|
|
0.9, 0.5, 0.1,
|
|
0.3, 0.2, 0.05
|
|
), nrow = 2L, ncol = 3L))
|
|
|
|
nb_col_seq <- seq(min_nb_col, max_nb_col, by = 50)
|
|
|
|
lbm_list <- lapply(nb_col_seq, function(nb_col) {
|
|
sampleBipartiteSBM(
|
|
nbNodes = c(nb_row, nb_col), blockProp = blockProp, connectParam = connectParam,
|
|
model = model
|
|
)$rNetwork()
|
|
})
|
|
|
|
unonehot <- function(mat) {
|
|
apply(mat, 1, FUN = function(row) which(row == 1))
|
|
}
|
|
|
|
lbm_matrices <- lapply(lbm_list, function(lbm) lbm$networkData)
|
|
lbm_row_memberships <- lapply(lbm_list, function(lbm) apply(lbm$indMemberships$row, 1, FUN = function(row) which(row == 1)))
|
|
lbm_col_memberships <- lapply(lbm_list, function(lbm) apply(lbm$indMemberships$col, 1, FUN = function(col) which(col == 1)))
|
|
|
|
library(here)
|
|
results_dir <- here("results", "increasing_size")
|
|
save_path <- here(results_dir, paste0("sbm_incr_", model, "_from_", min_nb_col, "_to_", max_nb_col, ".Rds"))
|
|
|
|
if (!dir.exists(results_dir)) {
|
|
dir.create(results_dir, recursive = TRUE)
|
|
}
|
|
|
|
print(paste0("Final results will be saved to ", save_path))
|
|
|
|
# (epoch <- as.integer(Sys.time()))
|
|
|
|
# temp_dir <- here(results_dir, paste0(min_nb_col, "_to_", max_nb_col, "tmp", epoch))
|
|
|
|
# print(paste0("Temp saved to ", temp_dir))
|
|
|
|
library(parallelly)
|
|
library(future)
|
|
library(future.apply)
|
|
library(future.callr)
|
|
|
|
plan(tweak("callr", workers = 64))
|
|
|
|
lbm_res <- future_lapply(seq_along(lbm_matrices), function(mat_idx) {
|
|
start_time <- Sys.time()
|
|
fit <- estimateBipartiteSBM(netMat = lbm_matrices[[mat_idx]], estimOptions = list(plot = 0))
|
|
stop_time <- Sys.time()
|
|
out_list <- list(fit = fit, time = difftime(stop_time, start_time, units = "secs"))
|
|
return(out_list)
|
|
}, future.seed = TRUE)
|
|
|
|
lbm_fits <- lapply(lbm_res, function(lbm) lbm$fit)
|
|
lbm_times <- sapply(lbm_res, function(lbm) lbm$time)
|
|
|
|
lbm_fit_row <- lapply(lbm_fits, function(lbm) unonehot(lbm$indMemberships$row))
|
|
lbm_fit_col <- lapply(lbm_fits, function(lbm) unonehot(lbm$indMemberships$col))
|
|
|
|
library(aricode)
|
|
|
|
ari_row <- sapply(seq_along((lbm_matrices)), function(idx) {
|
|
ARI(lbm_row_memberships[[idx]], lbm_fit_row[[idx]])
|
|
})
|
|
|
|
ari_col <- sapply(seq_along((lbm_matrices)), function(idx) {
|
|
ARI(lbm_col_memberships[[idx]], lbm_fit_col[[idx]])
|
|
})
|
|
|
|
out_df <- data.frame(n1 = nb_row, n2 = nb_col_seq, model = model, time = lbm_times, ari_row = ari_row, ari_col = ari_col)
|
|
|
|
saveRDS(out_df, save_path)
|