diff --git a/increasing_size_test.R b/increasing_size_test.R new file mode 100644 index 0000000..4befcb5 --- /dev/null +++ b/increasing_size_test.R @@ -0,0 +1,61 @@ +library(sbm) + +set.seed(1234) + +nb_row <- 50 + + +blockProp <- list( + c(0.25, 0.75), + c(0.1, 0.4, 0.5) +) + +connectParam <- list(mean = matrix(c( + 0.9, 0.5, 0.1, + 0.3, 0.2, 0.05 +), nrow = 2L, ncol = 3L)) + +nb_col_seq <- seq(50, 5000, by = 50) + +lbm_list <- lapply(nb_col_seq, function(nb_col) { + sampleBipartiteSBM( + nbNodes = c(nb_row, nb_col), blockProp = blockProp, connectParam = connectParam, + model = "bernoulli" + )$rNetwork() +}) + +unonehot <- function(mat) { + apply(mat, 1, FUN = function(row) which(row == 1)) +} + +lbm_matrices <- lapply(lbm_list, function(lbm) lbm$networkData) +lbm_row_memberships <- lapply(lbm_list, function(lbm) apply(lbm$indMemberships$row, 1, FUN = function(row) which(row == 1))) +lbm_col_memberships <- lapply(lbm_list, function(lbm) apply(lbm$indMemberships$col, 1, FUN = function(col) which(col == 1))) + +library(future) +library(future.apply) +library(future.callr) + +plan(tweak("callr", workers = availableWorkers(omit = 1L))) + +lbm_res <- future_lapply(lbm_matrices, function(mat) { + start_time <- Sys.time() + fit <- estimateBipartiteSBM(netMat = mat, estimOptions = list(plot = 0)) + stop_time <- Sys.time() + return(list(fit = fit, time = stop_time - start_time)) +}, future.seed = TRUE) + +lbm_fits <- lapply(lbm_res, function(lbm) lbm$fit) +lbm_times <- sapply(lbm_res, function(lbm) lbm$time) + +lbm_fit_row <- lapply(lbm_fits, function(lbm) unonehot(lbm$indMemberships$row)) +lbm_fit_col <- lapply(lbm_fits, function(lbm) unonehot(lbm$indMemberships$col)) + +library(aricode) + +sapply(seq_along((lbm_matrices)), function(idx) { + c( + "row" = ARI(lbm_row_memberships[[idx]], lbm_fit_row[[idx]]), + "col" = ARI(lbm_col_memberships[[idx]], lbm_fit_col[[idx]]) + ) +})