diff --git a/code/applications/sub-dore/baldock/01_baldock_clusterize.R b/code/applications/sub-dore/baldock/01_baldock_clusterize.R index a02c6d1..af6f377 100644 --- a/code/applications/sub-dore/baldock/01_baldock_clusterize.R +++ b/code/applications/sub-dore/baldock/01_baldock_clusterize.R @@ -74,7 +74,7 @@ names_aggreg_networks <- sapply( incidence_matrices <- lapply( seq_ids_network_aggreg, function(m) { - current_interaction_data <- interaction_data[which(interaction_data$id_network_aggreg == m), ] %>% + current_interaction_data <- interaction_data[which(interaction_data$id_network_aggreg == m), ] |> mutate( plantaggreg = paste(plantorder, plantfamily, plantgenus, plantspecies, diff --git a/code/for_report/.Rhistory b/code/for_report/.Rhistory new file mode 100644 index 0000000..e69de29 diff --git a/code/results/simulations/identifiability/1728998446.rds b/code/results/simulations/identifiability/1728998446.rds new file mode 100644 index 0000000..600bae2 Binary files /dev/null and b/code/results/simulations/identifiability/1728998446.rds differ diff --git a/code/results/simulations/identifiability/1728999716.rds b/code/results/simulations/identifiability/1728999716.rds new file mode 100644 index 0000000..b0e826f Binary files /dev/null and b/code/results/simulations/identifiability/1728999716.rds differ diff --git a/code/results/simulations/identifiability/1729000112.rds b/code/results/simulations/identifiability/1729000112.rds new file mode 100644 index 0000000..d99534b Binary files /dev/null and b/code/results/simulations/identifiability/1729000112.rds differ diff --git a/code/results/simulations/identifiability/1729001734.rds b/code/results/simulations/identifiability/1729001734.rds new file mode 100644 index 0000000..e16259f Binary files /dev/null and b/code/results/simulations/identifiability/1729001734.rds differ diff --git a/code/results/simulations/identifiability/1729002539.rds b/code/results/simulations/identifiability/1729002539.rds new file mode 100644 index 0000000..0cf0ddd Binary files /dev/null and b/code/results/simulations/identifiability/1729002539.rds differ diff --git a/code/simulations/simulations_NA_robustness.R b/code/simulations/simulations_NA_robustness.R index 1e618be..c9c49bc 100644 --- a/code/simulations/simulations_NA_robustness.R +++ b/code/simulations/simulations_NA_robustness.R @@ -4,33 +4,11 @@ necessary_packages <- c( options(future.globals.maxSize = Inf) -if (!all((necessary_packages %in% installed.packages()))) { - install.packages(necessary_packages[-length(necessary_packages)]) - remotes::install_github(repo = "Chabert-Liddell/colSBM@merge-bipartite-2") -} library("pROC") library("colSBM") -future::plan(future::multisession) - -# Arguments -arg <- commandArgs(trailingOnly = TRUE) - sampling <- "uniform" struct <- "modular" - -print(arg) - -if (length(arg) == 0L) { - message("No arguments provided, using default.") -} else { - if ("--struct" %in% arg) { - struct <- arg[(which(arg == "--struct") + 1L)] - } else { - message("No structure provided, defaulting to modular.") - } -} - # Arguments checks allowed_struct <- c("modular", "nested") @@ -52,6 +30,8 @@ nc2 <- 120 pir <- c(0.5, 0.3, 0.2) pic <- c(0.5, 0.3, 0.2) +struct <- "modular" + alpha <- switch(struct, "modular" = { alpha <- matrix(c( @@ -167,6 +147,13 @@ message( "Starting NA robustness simulation with ", sampling, " sampling and ", struct, " structure." ) + +library("future") +library("future.callr") +plan( + tweak("callr", workers = floor(parallelly::availableCores(omit = 1L) / 3L)), + tweak("callr", workers = 3L) +) result_list <- future.apply::future_lapply( seq_len(nrow(conditions)), function(current) { @@ -187,44 +174,7 @@ result_list <- future.apply::future_lapply( ) }) - NAs_selected_index <- switch(sampling, - "uniform" = { - seq_len(length(bipartite_collection_incidence[[1]])) - }, - "row" = { - row_cluster_selected <- sample.int(n = length(pir), size = 1) - row_nodes_selected <- which(Z[[1]][[1]] == row_cluster_selected) - col_nodes_selected <- seq(1, nc2) - - NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected) - - # Computes the index as a single number, R stores matrices by column - (NAs_selected_index_exp[["col"]] - 1) * - nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]] - }, - "col" = { - row_nodes_selected <- seq(1, nr2) - col_cluster_selected <- sample.int(n = length(pic), size = 1) - col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected) - - NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected) - - # Computes the index as a single number, R stores matrices by column - (NAs_selected_index_exp[["col"]] - 1) * - nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]] - }, - "rowcol" = { - row_cluster_selected <- sample.int(n = length(pir), size = 1) - row_nodes_selected <- which(Z[[1]][[1]] == row_cluster_selected) - col_cluster_selected <- sample.int(n = length(pic), size = 1) - col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected) - NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected) - - # Computes the index as a single number, R stores matrices by column - (NAs_selected_index_exp[["col"]] - 1) * - nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]] - } - ) + NAs_selected_index <- seq_len(length(bipartite_collection_incidence[[1]])) NAs_index <- sample(NAs_selected_index, size = floor(prop_NAs * length(NAs_selected_index))) @@ -239,7 +189,8 @@ result_list <- future.apply::future_lapply( netlist = bipartite_collection_incidence, colsbm_model = model, nb_run = 1, global_opts = list( - nb_cores = parallel::detectCores() - 1, verbosity = 0 + nb_cores = parallel::detectCores() - 1, verbosity = 0, + backend = "future" ) ) stop_time <- Sys.time() @@ -248,7 +199,8 @@ result_list <- future.apply::future_lapply( netlist = bipartite_collection_incidence[[1]], colsbm_model = "iid", nb_run = 1, global_opts = list( - nb_cores = parallel::detectCores() - 1, verbosity = 0 + nb_cores = parallel::detectCores() - 1, verbosity = 0, + backend = "future" ) ) diff --git a/code/simulations/simulations_identifiability.R b/code/simulations/simulations_identifiability.R new file mode 100644 index 0000000..aac715d --- /dev/null +++ b/code/simulations/simulations_identifiability.R @@ -0,0 +1,109 @@ +necessary_packages <- c( + "remotes", "tictoc", "combinat", "parallel", "colSBM", "future.apply", "here", "progressr" +) + +options(future.globals.maxSize = Inf) + +if (!all((necessary_packages %in% installed.packages()))) { + install.packages(necessary_packages[-length(necessary_packages)]) + remotes::install_github(repo = "Chabert-Liddell/colSBM", force = TRUE) +} +library(colSBM) +library(here) +library(progressr) +handlers(global = TRUE) +future::plan(future::multisession()) +set.seed(1234) + +# Network param +nr <- 120 +nc <- 120 +M <- 2 + +distribution <- "bernoulli" +Q <- c(3, 3) + +alpha <- matrix( + c( + 0.2, 0.1, 0, + 0.4, 0.9, 0.6, + 0, 0.7, 0.8 + ), + nrow = 3, ncol = 3 +) + +pi1 <- c(0.5, 0.5, 0) +pi2 <- c(0, 0.5, 0.5) + +rho1 <- c(0.5, 0.5, 0) +rho2 <- c(0, 0.5, 0.5) + + + +repetitions <- seq.int(10L) +with_progress({ + p <- progressor(along = repetitions) + results <- future.apply::future_lapply(repetitions, function(repli) { + p(amount = 0) + networks_list <- list( + colSBM:::generate_bipartite_network( + nr = nr, nc = nc, alpha = alpha, pi = pi1, rho = rho1 + ), + colSBM:::generate_bipartite_network( + nr = nr, nc = nc, alpha = alpha, pi = pi2, rho = rho2 + ) + ) + fit_list <- lapply(1:3, function(idx) { + estimate_colBiSBM(netlist = networks_list, net_id = c("top", "bot"), colsbm_model = "pirho", distribution = "bernoulli", nb_run = 1L, global_opts = list(verbosity = 2L, backend = "no_mc")) + }) + names(fit_list) <- repli + p(message = paste0("Step ", repli, " out of ", length(repetitions))) + fit_list + }) +}) +save_dir <- here::here("code", "results", "simulations", "identifiability") +# The file path with with a timestamp as epoch +save_path <- file.path(save_dir, paste0(as.integer(Sys.time()), ".rds")) +if (!dir.exists(save_dir)) { + dir.create(save_dir, recursive = TRUE) +} + +saveRDS(results, save_path) + +# Plots +library(ggplot2) +library(patchwork) + +# True values +p_alpha <- alpha |> + t() |> + reshape2::melt() |> + ggplot2::ggplot(ggplot2::aes(x = Var1, y = Var2, fill = value)) + + ggplot2::geom_tile() + + ggplot2::scale_fill_gradient2("alpha", + low = "white", + high = "red", + limits = c( + 0, + ifelse(distribution == "bernoulli", 1, max(alpha)) + ) + ) + + ggplot2::geom_hline(yintercept = seq(Q[1]) + .5) + + ggplot2::geom_vline(xintercept = seq(Q[2]) + .5) + + ggplot2::scale_x_continuous(breaks = seq(Q[2])) + + ggplot2::scale_y_reverse(breaks = seq(Q[1])) + + ggplot2::theme_bw(base_size = 15, base_rect_size = 1, base_line_size = 1) + + ggplot2::xlab("") + + ggplot2::ylab("") + + ggplot2::coord_fixed(expand = FALSE) + + ggplot2::geom_text(ggplot2::aes(label = round(value, 2)), color = "black") + + ggtitle("True alpha") + +plot_list <- lapply(results, function(fit_list) { + lapply(fit_list, function(fit) { + plot(fit$best_fit, type = "meso", values = TRUE) + + ggtitle(names(fit_list)) + }) +}) + +wrap_plots(plotlist = append(list(p_alpha), plot_list), nrow = 2) diff --git a/code/simulations/simulations_inference_bernoulli.R b/code/simulations/simulations_inference_bernoulli.R index 2851bec..5b3c69d 100644 --- a/code/simulations/simulations_inference_bernoulli.R +++ b/code/simulations/simulations_inference_bernoulli.R @@ -6,7 +6,7 @@ options(future.globals.maxSize = Inf) if (!all((necessary_packages %in% installed.packages()))) { install.packages(necessary_packages[-length(necessary_packages)]) - remotes::install_github(repo = "Chabert-Liddell/colSBM@merge-bipartite-2") + remotes::install_github(repo = "Chabert-Liddell/colSBM") } # Sourcing all necessary files