diff --git a/README.md b/README.md index a1a74ba..ff512cf 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -#  MIA Stage 2024 +# MIA Stage 2024 Ce dépôt accueille tout le travaille que je vais réaliser pendant mon stage au MIA Paris-Saclay en 2024. diff --git a/code/analysis/analyze_NA_robustness.Rmd b/code/analysis/analyze_NA_robustness.Rmd index b35c277..e6b4323 100644 --- a/code/analysis/analyze_NA_robustness.Rmd +++ b/code/analysis/analyze_NA_robustness.Rmd @@ -7,7 +7,7 @@ output: --- ```{r knitropts, echo = FALSE} -knitr::opts_chunk$set(fig.width=12L, fig.height=10L) +knitr::opts_chunk$set(fig.width = 12L, fig.height = 10L) ``` ```{r imports, echo = FALSE, include=FALSE} @@ -19,7 +19,7 @@ loadNamespace(package = "patchwork") ``` ```{r data, echo = FALSE} -#  Loading data +# Loading data data_folder <- file.path(here(), "code", "results", "simulations", "NA_robustness") files_list <- list.files(data_folder, pattern = "^NA_robustness_22-04-2024_17") ``` @@ -31,13 +31,13 @@ prepare_dataframes_auc_ari <- function(data) { sampling <- unique(data[["sampling"]]) struct <- unique(data[["struct"]]) - #  Averaging over repetitions + # Averaging over repetitions averaged_data <- data %>% group_by(prop_NAs, model) %>% select(-c(repetition, sampling, struct)) %>% summarise_all(list(mean = mean, sd = sd)) - #  Preparing auc_data + # Preparing auc_data auc_data <- averaged_data %>% select(c(prop_NAs, model) | contains("auc_")) %>% rename_with(~ gsub("auc_", "", .x, fixed = TRUE)) diff --git a/code/analysis/analyze_inference_bernoulli.qmd b/code/analysis/analyze_inference_bernoulli.qmd index 2684c46..21ccf36 100644 --- a/code/analysis/analyze_inference_bernoulli.qmd +++ b/code/analysis/analyze_inference_bernoulli.qmd @@ -24,7 +24,7 @@ loadNamespace("kableExtra") ``` ```{r data, echo = FALSE} -#  Loading data +# Loading data data_folder <- file.path(here(), "code", "results", "simulations", "inference", "bernoulli") # files_list <- list.files(data_folder, pattern = "^NA_robustness_19-04-2024") data <- readRDS(file.path(data_folder, "bernoulli_inference_06-05-2024_17-21-48_1-972.Rds")) @@ -62,7 +62,7 @@ print_data <- av_data %>% group_by(epsilon_alpha) %>% round(2) %>% mutate( - #  Model comparison + # Model comparison pirho_over_sep = pirho_over_sep_mean, pirho_over_iid = pirho_over_iid_mean, pirho_over_pi = pirho_over_pi_mean, @@ -77,7 +77,7 @@ print_data <- av_data %>% pirho_Q2_over_4 = pirho_Q2_over_4_mean, - #  Grouping accuracy + # Grouping accuracy pirho_mean_row_ARI = paste(pirho_mean_row_ARI_mean, pirho_mean_row_ARI_sd, sep = "\\pm" @@ -132,7 +132,7 @@ ft_1 <- flextable(print_data) |> opts_html = list( scroll = list(freeze_first_column = TRUE) ) - ) |> + ) |> compose(j = "pirho_mean_row_ARI", value = as_paragraph(as_equation(pirho_mean_row_ARI))) ft_1 <- compose(ft_1, j = "pirho_mean_col_ARI", value = as_paragraph(as_equation(pirho_mean_col_ARI))) ft_1 <- compose(ft_1, j = "pirho_double_row_ARI", value = as_paragraph(as_equation(pirho_double_row_ARI))) @@ -143,7 +143,7 @@ ft_1 <- ft_1 |> mk_par( as_equation(col_keys, width = .1, height = .2) ) ) -theme_zebra(ft_1) |> +theme_zebra(ft_1) |> hline(i = 1, border = fp_border_default(), part = "header") |> vline(j = 1, border = fp_border_default(), part = "all") |> bg(j = 1, bg = "#DDDDDD", part = "all") |> diff --git a/code/analysis/investigating/invest_clust_perf.R b/code/analysis/investigating/invest_clust_perf.R index 649291c..fce0042 100644 --- a/code/analysis/investigating/invest_clust_perf.R +++ b/code/analysis/investigating/invest_clust_perf.R @@ -6,9 +6,9 @@ set.seed(1234, "L'Ecuyer-CMRG") data(dorebipartite) str(dorebipartite) -nb_cores = parallelly::availableCores() -concurrent_models <- min(length(seq(1,4)), nb_cores%/%2) -per_model <- (nb_cores - concurrent_models) %/% length(seq(1,4)) +nb_cores <- parallelly::availableCores() +concurrent_models <- min(length(seq(1, 4)), nb_cores %/% 2) +per_model <- (nb_cores - concurrent_models) %/% length(seq(1, 4)) mb1 <- microbenchmark( "fancy computation" = { @@ -44,4 +44,4 @@ mb2 <- microbenchmark( times = 5L ) -# Bilan : plus intéressant de bourriner \ No newline at end of file +# Bilan : plus intéressant de bourriner diff --git a/code/analysis/investigating/invest_inference_bernoulli_double_ARI.Rmd b/code/analysis/investigating/invest_inference_bernoulli_double_ARI.Rmd index f8ed5a1..847c7c1 100644 --- a/code/analysis/investigating/invest_inference_bernoulli_double_ARI.Rmd +++ b/code/analysis/investigating/invest_inference_bernoulli_double_ARI.Rmd @@ -44,7 +44,7 @@ file_list <- list.files(data_folder) file <- file_list[[1]] data <- readRDS(file.path(data_folder, file)) -#  Extracting data to netlist and Z +# Extracting data to netlist and Z netlist_memb <- data[["netlist"]] netlist <- lapply(netlist_memb, function(full) full[["incidence_matrix"]]) nr <- nrow(netlist[[1]]) @@ -58,8 +58,6 @@ joined_col_memberships <- unlist(col_blockmemberships) row_taus <- lapply(row_blockmemberships, function(Z1_m) colSBM:::.one_hot(Z1_m, 4)) col_taus <- lapply(col_blockmemberships, function(Z2_m) colSBM:::.one_hot(Z2_m, 4)) - - ``` ```{r parameters, echo = FALSE} @@ -110,7 +108,7 @@ ARI(joined_col_memberships, good_joined_col) ```{r, echo = FALSE} wrap_plots( - plot_alpha(data[["alpha"]])+ labs(caption = "Vrai alpha"), + plot_alpha(data[["alpha"]]) + labs(caption = "Vrai alpha"), plot(good_model, type = "meso", mixture = TRUE, values = TRUE) + labs(caption = "Q = (4,4)"), plot(bad_model, type = "meso", mixture = TRUE, values = TRUE) + labs(caption = "Q = (4,5)") ) @@ -143,18 +141,20 @@ Ici on clone le modèle et on lui donne les bons paramètres pour voir s'il fait mieux que celui trouvé avant. ```{r testing_true_params, echo = FALSE} -# Un clone +# Un clone good_clone <- good_model$clone() -# Les vrais paramètres +# Les vrais paramètres alpha <- data[["alpha"]] taus <- lapply(seq_along(row_taus), function(m) { - list(row = row_taus[[m]], - col = col_taus[[m]]) + list( + row = row_taus[[m]], + col = col_taus[[m]] + ) }) # Pi params pi1 <- as.vector(data[["pi1"]]) -pi1[1] <- pi1[1] + 1e-9 +pi1[1] <- pi1[1] + 1e-9 rho1 <- rep(0.25, 4) pi2 <- rep(0.25, 4) rho2 <- as.vector(data[["rho2"]]) @@ -165,15 +165,17 @@ good_clone[["tau"]] <- taus good_clone[["pi"]] <- pi good_clone[["alpha"]] <- alpha -result_BICL <- c(bad_BICL = bad_model$compute_BICL(), -good_model_BICL = good_model$compute_BICL(), -good_clone_BICL = good_clone$compute_BICL()) +result_BICL <- c( + bad_BICL = bad_model$compute_BICL(), + good_model_BICL = good_model$compute_BICL(), + good_clone_BICL = good_clone$compute_BICL() +) print(result_BICL) ``` On vient mettre à la place du 4,4 le modèle avec les vrais paramètres. Et on espère voir la bonne information se diffuser. ```{r adjusting, echo = FALSE} -fitted_bisbmpop_pirho$model_list[[4,4]] <- good_clone -fit_1_pass <- adjust_colBiSBM(fitted_bisbmpop_pirho, Q = c(4,4), depth = 1L, nb_pass = 1L) +fitted_bisbmpop_pirho$model_list[[4, 4]] <- good_clone +fit_1_pass <- adjust_colBiSBM(fitted_bisbmpop_pirho, Q = c(4, 4), depth = 1L, nb_pass = 1L) ``` \ No newline at end of file diff --git a/code/analysis/investigating/invest_split_clust.R b/code/analysis/investigating/invest_split_clust.R index 1cfd67a..f8c4728 100644 --- a/code/analysis/investigating/invest_split_clust.R +++ b/code/analysis/investigating/invest_split_clust.R @@ -2,7 +2,7 @@ library(here) inv_folder <- file.path(here(), "code", "results", "investigating", "split_clust") -for(file in list.files(inv_folder)) { +for (file in list.files(inv_folder)) { load(file.path(inv_folder, file)) } @@ -16,10 +16,9 @@ noname_X <- as.matrix(noname_X) colSBM:::split_clust(noname_X, Z, Q = 1, is_bipartite = TRUE) -alt_X <- colSBM::generate_bipartite_collection(nr = 15, nc = 30, pi = c(1), rho = c(1), alpha = c(0.7), M = 1 -)[[1]] +alt_X <- colSBM::generate_bipartite_collection(nr = 15, nc = 30, pi = c(1), rho = c(1), alpha = c(0.7), M = 1)[[1]] colSBM:::split_clust(alt_X, Z, Q = 1, is_bipartite = TRUE) -# La solution était que X était regardé comme une table et unique marche -# différemment \ No newline at end of file +# La solution était que X était regardé comme une table et unique marche +# différemment diff --git a/code/applications/base_analysis.qmd b/code/applications/base_analysis.qmd new file mode 100644 index 0000000..85e3a28 --- /dev/null +++ b/code/applications/base_analysis.qmd @@ -0,0 +1,133 @@ +### Modèle ${{model}}$ + +```{r} +library(here) + +library(colSBM) + +library(dplyr) +library(tidyr) + +library(plotly) +library(ggplot2) +library(ggforce) +library(ggrepel) +library(latex2exp) +``` +```{r} +list_collection <- readRDS("{{clustering}}") +unlisted_best_partition <- unlist(extract_best_bipartite_partition(list_collection)) + +graph_size_df <- build_graph_size_dataframe(unlisted_best_partition) +summarized_graph_size_df <- graph_size_df %>% + group_by(collection_id) %>% + select(-net_id) %>% + summarise( + M = n(), + nr_mean = mean(nr), + nr_sd = ifelse(length(nr) > 1, sd(nr), 0), + nc_mean = mean(nc), + nc_sd = ifelse(length(nc) > 1, sd(nc), 0), + Qr = first(Qr), + Qc = first(Qc) + ) +``` + +:::{.panel-tabset} +#### Analyse des tailles de cluster + +```{r} +filtered_summarized_graph_size_df <- summarized_graph_size_df +ggplot(filtered_summarized_graph_size_df) + + aes(x = Qc, y = Qr, color = collection_id, label = M) + + geom_point() + # , position = position_jitter(h = 0.15, w = 0.05) + geom_text_repel() + + xlab(TeX("$Q_c$")) + + ylab(TeX("$Q_r$")) + + coord_fixed() + +plot_ly(x = filtered_summarized_graph_size_df$Qc, + y = filtered_summarized_graph_size_df$Qr, + z = filtered_summarized_graph_size_df$M, + type = "scatter3d", mode = "markers", + color = filtered_summarized_graph_size_df$collection_id) %>% + layout(scene = list(xaxis = list(title = "Q_c"), + yaxis = list(title = "Q_r"))) +``` + +#### Analyse des tailles de réseaux + +```{r} +ggplot(filtered_summarized_graph_size_df) + + aes(x = nc_mean, y = nr_mean, color = collection_id) + + geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05) + geom_ellipse(aes( + x0 = nc_mean, y0 = nr_mean, + a = 0.5 * nc_sd, + b = 0.5 * nr_sd, + fill = collection_id, angle = 0 + ), alpha = 0.1) + + geom_text_repel(aes(label = collection_id)) + + xlab(TeX("$n_c$")) + + ylab(TeX("$n_r$")) + + coord_fixed() + + ggtitle("Distribution des tailles moyennes et écart-types") + +max_nr <- 200 +max_nc <- 200 + +ggplot(filtered_summarized_graph_size_df %>% filter(nr_mean <= max_nr, nc_mean <= max_nc)) + + aes(x = nc_mean, y = nr_mean, color = collection_id) + + geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05) + geom_ellipse(aes( + x0 = nc_mean, y0 = nr_mean, + a = 0.5 * nc_sd, + b = 0.5 * nr_sd, + fill = collection_id, angle = 0 + ), alpha = 0.1) + + geom_text_repel(aes(label = collection_id)) + + xlab(TeX("$n_c$")) + + ylab(TeX("$n_r$")) + + coord_fixed() + + ggtitle(TeX(paste0("Distribution des tailles moyennes et écart-types,", "$n_r <=", max_nr, "$", " et ", "$n_c <=", max_nc, "$"))) + +filtered_graph_size <- graph_size_df +ggplot(filtered_graph_size) + + aes(x = collection_id, y = nr, fill = collection_id) + + geom_boxplot() +ggplot(filtered_graph_size) + + aes(x = collection_id, y = nc, fill = collection_id) + + geom_boxplot() + +ggplot(filtered_graph_size) + + aes(x = collection_id, y = nr + nc, fill = collection_id) + + geom_boxplot() + +ggplot(filtered_graph_size) + + aes(x = collection_id, y = nr * nc, fill = collection_id) + + geom_boxplot() +``` + +::: + +#### Structure des collections + +```{r} +#| output: asis +for (i in seq_len(length(unlisted_best_partition))) { + cat("\n##### Structure collection ", i, "\n") + current_col <- unlisted_best_partition[[i]]$clone() + current_col$net_id <- current_col$net_id %>% + stringr::str_trunc(width = 20L) + try({ + p <- plot( + current_col, + type = "meso", + values = FALSE, mixture = TRUE + ) + + ggtitle(paste0("Structure\ncollection ", i)) + print(p) + }) +} + +``` \ No newline at end of file diff --git a/code/applications/dore/01_dore_clusterize.R b/code/applications/dore/01_dore_clusterize.R index 83d808b..2fcd5f7 100644 --- a/code/applications/dore/01_dore_clusterize.R +++ b/code/applications/dore/01_dore_clusterize.R @@ -24,7 +24,7 @@ if (length(arg) == 0L) { } } -#  Arguments checks +# Arguments checks allowed_model <- c("iid", "pi", "rho", "pirho") stopifnot( "Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model), diff --git a/code/applications/dore/02_dore_analysis.html b/code/applications/dore/02_dore_analysis.html new file mode 100644 index 0000000..69a09ec --- /dev/null +++ b/code/applications/dore/02_dore_analysis.html @@ -0,0 +1,1731 @@ + + + + + + + + + +Clustering avec colSBM des données + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+
+

Clustering avec colSBM des données

+
+ + + +
+ + + + +
+ + + +
+ + +
+

Analyse

+
+

Analyse par modèle

+
+ +
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16 ##### Structure collection 17 ##### Structure collection 18

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 Error in map(.x, .f, …) : ℹ In index: 1. Caused by error in names(object) <- nm: ! ‘names’ attribute [1] must be the same length as the vector [0] ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16 ##### Structure collection 17 ##### Structure collection 18 ##### Structure collection 19 ##### Structure collection 20 ##### Structure collection 21

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 Error in map(.x, .f, …) : ℹ In index: 1. Caused by error in names(object) <- nm: ! ‘names’ attribute [1] must be the same length as the vector [0] ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16 ##### Structure collection 17

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16 ##### Structure collection 17 ##### Structure collection 18

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16 ##### Structure collection 17 ##### Structure collection 18 ##### Structure collection 19

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 Error in map(.x, .f, …) : ℹ In index: 1. Caused by error in names(object) <- nm: ! ‘names’ attribute [1] must be the same length as the vector [0] ##### Structure collection 13 ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16 ##### Structure collection 17 ##### Structure collection 18 ##### Structure collection 19 ##### Structure collection 20 ##### Structure collection 21 ##### Structure collection 22 ##### Structure collection 23 ##### Structure collection 24

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 Error in map(.x, .f, …) : ℹ In index: 1. Caused by error in names(object) <- nm: ! ‘names’ attribute [1] must be the same length as the vector [0] ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16 ##### Structure collection 17 ##### Structure collection 18 ##### Structure collection 19 ##### Structure collection 20 Error in map(.x, .f, …) : ℹ In index: 1. Caused by error in names(object) <- nm: ! ‘names’ attribute [1] must be the same length as the vector [0] ##### Structure collection 21 ##### Structure collection 22

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 ##### Structure collection 13 Error in map(.x, .f, …) : ℹ In index: 1. Caused by error in names(object) <- nm: ! ‘names’ attribute [1] must be the same length as the vector [0] ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16 ##### Structure collection 17

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 Error in map(.x, .f, …) : ℹ In index: 1. Caused by error in names(object) <- nm: ! ‘names’ attribute [1] must be the same length as the vector [0] ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16 Error in map(.x, .f, …) : ℹ In index: 1. Caused by error in names(object) <- nm: ! ‘names’ attribute [1] must be the same length as the vector [0] ##### Structure collection 17 ##### Structure collection 18

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16 ##### Structure collection 17 ##### Structure collection 18

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16 ##### Structure collection 17 ##### Structure collection 18

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 Error in map(.x, .f, …) : ℹ In index: 1. Caused by error in names(object) <- nm: ! ‘names’ attribute [1] must be the same length as the vector [0] ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16 ##### Structure collection 17 ##### Structure collection 18 ##### Structure collection 19 ##### Structure collection 20 ##### Structure collection 21

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 Error in map(.x, .f, …) : ℹ In index: 1. Caused by error in names(object) <- nm: ! ‘names’ attribute [1] must be the same length as the vector [0] ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14

+
+
+
+
+
+ +
+
+
+
+
+
+

+
+
+
+
+
+ +
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+

Structure des collections

+
+
Structure collection 1
+

##### Structure collection 2 ##### Structure collection 3 ##### Structure collection 4 ##### Structure collection 5 ##### Structure collection 6 ##### Structure collection 7 ##### Structure collection 8 ##### Structure collection 9 ##### Structure collection 10 ##### Structure collection 11 ##### Structure collection 12 ##### Structure collection 13 ##### Structure collection 14 ##### Structure collection 15 ##### Structure collection 16 ##### Structure collection 17 ##### Structure collection 18 ##### Structure collection 19 ##### Structure collection 20 ##### Structure collection 21 ##### Structure collection 22 ##### Structure collection 23 ##### Structure collection 24 ##### Structure collection 25 ##### Structure collection 26

+
+
+
+
+
+
+
+ +
+ + +
+ + + + + \ No newline at end of file diff --git a/code/applications/dore/02_dore_analysis.qmd b/code/applications/dore/02_dore_analysis.qmd index f22fe57..f592d50 100644 --- a/code/applications/dore/02_dore_analysis.qmd +++ b/code/applications/dore/02_dore_analysis.qmd @@ -1,52 +1,46 @@ --- +format: html +title: Clustering avec `colSBM` des données +execute: + echo: false + warning: false --- +# Analyse + ```{r} library(colSBM) library(here) +library(stringr) ``` ```{r} root_app_folder <- file.path(here(), "code", "applications") source(file.path(root_app_folder, "utils.R")) data_folder <- file.path(here(), "code", "results", "applications", "dore") -files_vec <- get_recent_files(data_folder) +files_vec <- get_recent_files(data_folder, n = 16) files_vec <- identify_models(files_vec) -list_collection <- readRDS(file.path(data_folder, "dore_collection_iid_24-05-24_18-07-50.Rds")) - -best_partition <- extract_best_bipartite_partition(list_collection) -length(unlist(best_partition)) -unlisted_bp <- unlist(best_partition) +list_clustering <- files_vec +# list_collection <- readRDS(file.path(data_folder, "dore_collection_iid_24-05-24_18-07-50.Rds")) ``` -```{r} -library(stringr) +## Analyse par modèle -# string_split <- function(string,size) { -# unlist(str_extract_all(string, paste0('.{1,',size,'}'))) -# } +:::{.panel-tabset} -# custom_str_wrap <- function(str, width = 30) { -# if (nchar(str) > width) { -# return(paste(string_split(str, size = width), collapse = "\n")) -# } -# return(str) -# } +```{r write_tabs} +#| warning: false +#| output: asis -iid_BICL <- sum(sapply(unlisted_bp, function(model) model$BICL)) - -#  Shortening net names -for (idx in seq_len(length(unlisted_bp))) { - unlisted_bp[[idx]]$net_id <- sapply(unlisted_bp[[idx]]$net_id, function(id) str_trunc(id,20)) -} - -``` - -```{r} -for (idx in seq_len(length(unlisted_bp))) { - print(plot(unlisted_bp[[idx]], type = "meso", mixture = TRUE, values = TRUE)) +# Generate content for each model using knit_expand +for (clustering_idx in seq_len(length(list_clustering))) { + clustering <- list_clustering[clustering_idx] + model <- names(clustering) + expanded_content <- knitr::knit_expand(file = file.path(root_app_folder, "base_analysis.qmd"), clustering = clustering, model = model) + res <- knitr::knit_child(text = expanded_content, quiet = TRUE) + cat(res, sep = "\n") + cat("\n") } ``` -Analyser par LBM : -3, \ No newline at end of file +::: \ No newline at end of file diff --git a/code/applications/herbivores/02_herbivores_clustering.R b/code/applications/herbivores/02_herbivores_clustering.R index ffcbe19..9027e7e 100644 --- a/code/applications/herbivores/02_herbivores_clustering.R +++ b/code/applications/herbivores/02_herbivores_clustering.R @@ -28,7 +28,7 @@ if (length(arg) == 0L) { } } -#  Arguments checks +# Arguments checks allowed_model <- c("iid", "pi", "rho", "pirho") stopifnot( "Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model) diff --git a/code/applications/herbivores/03_herbivores_analysis.html b/code/applications/herbivores/03_herbivores_analysis.html index a7ad569..83a039e 100644 --- a/code/applications/herbivores/03_herbivores_analysis.html +++ b/code/applications/herbivores/03_herbivores_analysis.html @@ -33,6 +33,15 @@ ul.task-list li input[type="checkbox"] { + + + + + + + + + @@ -62,60 +71,38 @@ ul.task-list li input[type="checkbox"] { -
-
-
here() starts at /home/polarolouis/Documents/Temporary Projects/mia-stage-2024
-
-
-

-Attachement du package : 'dplyr'
-
-
-
Les objets suivants sont masqués depuis 'package:stats':
+
- filter, lag
-
-
Les objets suivants sont masqués depuis 'package:base':
+
+
+
+

+
+
+
+ +
+
+
+
- intersect, setdiff, setequal, union
-
-
Warning in get_recent_files(data_folder): n = 4 is too large ! It should be at
-most 1
+
+
+
+

+
+
+

-
-
-
-
-

-
+
+
+

-
-
-
-
-

-
-
-
-
-
-
-
-
-

-
-
-
-
-
-
-
-
-

-
+
+

diff --git a/code/applications/herbivores/03_herbivores_analysis.qmd b/code/applications/herbivores/03_herbivores_analysis.qmd index 6858d82..09cf656 100644 --- a/code/applications/herbivores/03_herbivores_analysis.qmd +++ b/code/applications/herbivores/03_herbivores_analysis.qmd @@ -3,8 +3,18 @@ title: Analysis of herbivores data execute: echo: false freeze: auto + warning: false --- +```{r import_data} +root_app_folder <- file.path(here(), "code", "applications") +source(file.path(root_app_folder, "utils.R")) +data_folder <- file.path(here(), "code", "results", "applications", "herbivores") +files_vec <- get_recent_files(data_folder) +filepath <- files_vec[1] +list_collection <- readRDS(filepath) +``` + ```{r libs_and_data} library(here) @@ -12,21 +22,14 @@ library(colSBM) library(dplyr) library(tidyr) + +library(plotly) library(ggplot2) library(ggforce) -library(weird) - -root_app_folder <- file.path(here(), "code", "applications") -source(file.path(root_app_folder, "utils.R")) -data_folder <- file.path(here(), "code", "results", "applications", "herbivores") -files_vec <- get_recent_files(data_folder) -# files_vec <- identify_models(files_vec) +library(ggrepel) +library(latex2exp) ``` - -```{r} -filepath <- files_vec[1] -list_collection <- readRDS(filepath) - +```{r dataformatting} unlisted_best_partition <- unlist(extract_best_bipartite_partition(list_collection)) graph_size_df <- build_graph_size_dataframe(unlisted_best_partition) @@ -41,39 +44,72 @@ summarized_graph_size_df <- graph_size_df %>% nc_sd = ifelse(length(nc) > 1, sd(nc), 0), Qr = first(Qr), Qc = first(Qc) -) + ) +``` +### Analyse des tailles de cluster +```{r collection_blocks_plot_size} +#| layout-ncol: 2 +#| plotly: true +filtered_summarized_graph_size_df <- summarized_graph_size_df +ggplot(filtered_summarized_graph_size_df) + + aes(x = Qc, y = Qr, color = collection_id, label = M) + + geom_point() + # , position = position_jitter(h = 0.15, w = 0.05) + geom_text_repel() + + xlab(TeX("$Q_c$")) + + ylab(TeX("$Q_r$")) + + coord_fixed() + +plot_ly(x = filtered_summarized_graph_size_df$Qc, + y = filtered_summarized_graph_size_df$Qr, + z = filtered_summarized_graph_size_df$M, + type = "scatter3d", mode = "markers", + color = filtered_summarized_graph_size_df$collection_id) %>% + layout(scene = list(xaxis = list(title = "Q_c"), + yaxis = list(title = "Q_r"))) ``` -```{r collection_size_boxplot} -filtered_graph_size <- graph_size_df %>% filter(M > 5) +### Analyse des tailles de réseaux + +```{r size_plot} +#| layout-ncol: 2 +ggplot(filtered_summarized_graph_size_df) + + aes(x = nc_mean, y = nr_mean, color = collection_id) + + geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05) + geom_ellipse(aes( + x0 = nc_mean, y0 = nr_mean, + a = 0.5 * nc_sd, + b = 0.5 * nr_sd, + fill = collection_id, angle = 0 + ), alpha = 0.1) + + geom_text_repel(aes(label = collection_id)) + + xlab(TeX("$n_c$")) + + ylab(TeX("$n_r$")) + + coord_fixed() + + ggtitle("Distribution des tailles moyennes et écart-types") + +max_nr <- 200 +max_nc <- 200 + +ggplot(filtered_summarized_graph_size_df %>% filter(nr_mean <= max_nr, nc_mean <= max_nc)) + + aes(x = nc_mean, y = nr_mean, color = collection_id) + + geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05) + geom_ellipse(aes( + x0 = nc_mean, y0 = nr_mean, + a = 0.5 * nc_sd, + b = 0.5 * nr_sd, + fill = collection_id, angle = 0 + ), alpha = 0.1) + + geom_text_repel(aes(label = collection_id)) + + xlab(TeX("$n_c$")) + + ylab(TeX("$n_r$")) + + coord_fixed() + + ggtitle(TeX(paste0("Distribution des tailles moyennes et écart-types,", "$n_r <=", max_nr, "$", " et ", "$n_c <=", max_nc, "$"))) + +filtered_graph_size <- graph_size_df %>% filter(M > 1) ggplot(filtered_graph_size) + aes(x = collection_id, y = nr, fill = collection_id) + geom_boxplot() ggplot(filtered_graph_size) + aes(x = collection_id, y = nc, fill = collection_id) + geom_boxplot() - -# ggplot(filtered_graph_size) + -# aes(x = nc, y = nr, fill = collection_id) + -gg_bagplot(filtered_graph_size, x = filtered_graph_size[["nc"]], y = filtered_graph_size[["nr"]]) -``` - -```{r collection_blocks_plot} -filtered_summarized_graph_size_df <- summarized_graph_size_df %>% filter(M > 5) -ggplot(filtered_summarized_graph_size_df) + - aes(x = Qc, y = Qr, color = collection_id) + - geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05) - geom_circle(aes(x0 = Qc, y0 = Qr, r = 0.025 * M, fill = collection_id), alpha = 0.1) + - coord_fixed() - -``` - -```{r collection_size_mean} -filtered_summarized_graph_size_df <- summarized_graph_size_df %>% filter(M > 5) -ggplot(filtered_summarized_graph_size_df) + - aes(x = nc_mean, y = nr_mean, color = collection_id) + - geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05) - geom_ellipse(aes(x0 = nc_mean, y0 = nr_mean, a = 0.5 * nc_sd, b = 0.5 * nr_sd, fill = collection_id, angle = 0), alpha = 0.1) + - coord_fixed() - ``` diff --git a/code/applications/utils.R b/code/applications/utils.R index 601c773..61be415 100644 --- a/code/applications/utils.R +++ b/code/applications/utils.R @@ -8,8 +8,12 @@ #' #' @return A vector of size `n` with the file path. get_recent_files <- function(data_folder, n = 4) { - files_info <- file.info(file.path(data_folder, list.files(data_folder))) - files_info[["filepath"]] <- file.path(data_folder, list.files(data_folder)) + files_info <- file.info(file.path(data_folder, list.files(data_folder, + include.dirs = FALSE, pattern = "*dore_collection_[a-z]*_seed" + ))) + files_info[["filepath"]] <- file.path(data_folder, list.files(data_folder, + include.dirs = FALSE, pattern = "*dore_collection_[a-z]*_seed" + )) files_info <- sort_by(files_info, files_info[["ctime"]], decreasing = TRUE) if (n > nrow(files_info)) { warning( @@ -23,17 +27,10 @@ get_recent_files <- function(data_folder, n = 4) { #' Identify models identify_models <- function(files_vec) { - iid_id <- grep(pattern = "iid", files_vec, fixed = TRUE) - pi_id <- grep(pattern = "pi_", files_vec, fixed = TRUE) - rho_id <- grep(pattern = "_rho", files_vec, fixed = TRUE) - pirho_id <- grep(pattern = "pirho", files_vec, fixed = TRUE) - file_order <- c(iid_id, pi_id, rho_id, pirho_id) - stopifnot( - "There are multiple files matching models !" = - length(file_order) <= 4L + names(files_vec) <- stringr::str_extract( + string = files_vec, + pattern = "(iid|pirho|pi|rho)" ) - files_vec <- files_vec[file_order] - names(files_vec) <- c("iid", "pi", "rho", "pirho") return(files_vec) } @@ -50,4 +47,4 @@ build_graph_size_dataframe <- function(collection_list) { Qc = collection[["Q"]][[2]] ) })) -} \ No newline at end of file +} diff --git a/code/applications/weboflife/02_weboflife_clustering.R b/code/applications/weboflife/02_weboflife_clustering.R index 475ab05..f5f66ac 100644 --- a/code/applications/weboflife/02_weboflife_clustering.R +++ b/code/applications/weboflife/02_weboflife_clustering.R @@ -37,7 +37,7 @@ if (length(arg) == 0L) { } } -#  Arguments checks +# Arguments checks allowed_model <- c("iid", "pi", "rho", "pirho") stopifnot( "Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model), diff --git a/code/data/simulations/clustering/synthetic/synthetic_iid_seed_1234.Rds b/code/data/simulations/clustering/synthetic/synthetic_iid_seed_1234.Rds new file mode 100644 index 0000000..d8cd66b Binary files /dev/null and b/code/data/simulations/clustering/synthetic/synthetic_iid_seed_1234.Rds differ diff --git a/code/results/applications/dore/dore_collection_iid_28-05-24_15-39-23.Rds b/code/results/errored/new_dore_collection_iid_28-05-24_15-39-23.Rds similarity index 100% rename from code/results/applications/dore/dore_collection_iid_28-05-24_15-39-23.Rds rename to code/results/errored/new_dore_collection_iid_28-05-24_15-39-23.Rds diff --git a/code/results/applications/dore/dore_collection_pi_28-05-24_19-04-47.Rds b/code/results/errored/new_dore_collection_pi_28-05-24_19-04-47.Rds similarity index 100% rename from code/results/applications/dore/dore_collection_pi_28-05-24_19-04-47.Rds rename to code/results/errored/new_dore_collection_pi_28-05-24_19-04-47.Rds diff --git a/code/results/applications/dore/dore_collection_pirho_29-05-24_00-02-48.Rds b/code/results/errored/new_dore_collection_pirho_29-05-24_00-02-48.Rds similarity index 100% rename from code/results/applications/dore/dore_collection_pirho_29-05-24_00-02-48.Rds rename to code/results/errored/new_dore_collection_pirho_29-05-24_00-02-48.Rds diff --git a/code/results/applications/dore/dore_collection_rho_28-05-24_21-47-48.Rds b/code/results/errored/new_dore_collection_rho_28-05-24_21-47-48.Rds similarity index 100% rename from code/results/applications/dore/dore_collection_rho_28-05-24_21-47-48.Rds rename to code/results/errored/new_dore_collection_rho_28-05-24_21-47-48.Rds diff --git a/code/scripts/migale_application_herbivores.sh b/code/scripts/migale_application_herbivores.sh index 73594c5..a474f66 100755 --- a/code/scripts/migale_application_herbivores.sh +++ b/code/scripts/migale_application_herbivores.sh @@ -10,7 +10,7 @@ #$ -o logs/$JOB_NAME.$TASK_ID #$ -e logs/$JOB_NAME.$TASK_ID -# Creating log directory if it doesn't exists +# Creating log directory if it doesn't exists BASE_DIR="/home/$USER/work/mia-stage-2024" LOG_DIR=$(echo "$BASE_DIR/logs") @@ -18,12 +18,12 @@ if [ ! -d "$LOG_DIR" ]; then mkdir -p $LOG_DIR fi -# Constant data +# Constant data MODELARRAY=("iid" "pi" "rho" "pirho") MODEL=${MODELARRAY[$(($((SGE_TASK_ID - 1)) % 4))]} -# Finding directory +# Finding directory APPLICATIONS_DIR=$(echo "$BASE_DIR/code/applications") echo $APPLICATIONS_DIR diff --git a/code/scripts/migale_application_weboflife.sh b/code/scripts/migale_application_weboflife.sh index 026b3fb..517ded1 100755 --- a/code/scripts/migale_application_weboflife.sh +++ b/code/scripts/migale_application_weboflife.sh @@ -10,7 +10,7 @@ #$ -o logs/$JOB_NAME.$TASK_ID #$ -e logs/$JOB_NAME.$TASK_ID -# Creating log directory if it doesn't exists +# Creating log directory if it doesn't exists BASE_DIR="/home/$USER/work/mia-stage-2024" LOG_DIR=$(echo "$BASE_DIR/logs") @@ -18,12 +18,12 @@ if [ ! -d "$LOG_DIR" ]; then mkdir -p $LOG_DIR fi -# Constant data +# Constant data MODELARRAY=("iid" "pi" "rho" "pirho") MODEL=${MODELARRAY[$(($((SGE_TASK_ID - 1)) % 4))]} -# Finding directory +# Finding directory APPLICATIONS_DIR=$(echo "$BASE_DIR/code/applications") echo $APPLICATIONS_DIR diff --git a/code/scripts/migale_simulations_NA_robustness.sh b/code/scripts/migale_simulations_NA_robustness.sh index 1b3a331..359383b 100755 --- a/code/scripts/migale_simulations_NA_robustness.sh +++ b/code/scripts/migale_simulations_NA_robustness.sh @@ -10,9 +10,9 @@ #$ -o logs/$JOB_NAME.$TASK_ID #$ -e logs/$JOB_NAME.$TASK_ID -# Creating log directory if it doesn't exists +# Creating log directory if it doesn't exists -# Constant data +# Constant data STRUCTA=("nested" "modular") SAMPLINGA=("uniform" "row" "col" "rowcol") @@ -25,11 +25,11 @@ if [ ! -d "$LOG_DIR" ]; then mkdir -p $LOG_DIR fi -# Finding simulations directory +# Finding simulations directory SIMULATIONS_DIR=$(echo "$BASE_DIR/code/simulations") echo $SIMULATIONS_DIR -# Parsing sge array id +# Parsing sge array id Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct $STRUCT diff --git a/code/simulations/simulations_NA_robustness.R b/code/simulations/simulations_NA_robustness.R index ef589cb..b436908 100644 --- a/code/simulations/simulations_NA_robustness.R +++ b/code/simulations/simulations_NA_robustness.R @@ -28,7 +28,7 @@ if (length(arg) == 0L) { } } -#  Arguments checks +# Arguments checks allowed_struct <- c("modular", "nested") stopifnot( @@ -70,49 +70,61 @@ max_repetition <- 10L # Collections collections <- list( - "iid" = c(generate_bipartite_collection(nr1, nc1, - pir, pic, - alpha, 1, - model = "iid", - return_memberships = TRUE), + "iid" = c( + generate_bipartite_collection(nr1, nc1, + pir, pic, + alpha, 1, + model = "iid", + return_memberships = TRUE + ), generate_bipartite_collection(nr2, nc2, - pir, pic, - alpha, M-1, - model = "iid", - return_memberships = TRUE) + pir, pic, + alpha, M - 1, + model = "iid", + return_memberships = TRUE + ) ), - "pi" = c(generate_bipartite_collection(nr1, nc1, - pir, pic, - alpha, 1, - model = "pi", - return_memberships = TRUE), + "pi" = c( + generate_bipartite_collection(nr1, nc1, + pir, pic, + alpha, 1, + model = "pi", + return_memberships = TRUE + ), generate_bipartite_collection(nr2, nc2, - pir, pic, - alpha, M-1, - model = "pi", - return_memberships = TRUE) + pir, pic, + alpha, M - 1, + model = "pi", + return_memberships = TRUE + ) ), - "rho" = c(generate_bipartite_collection(nr1, nc1, - pir, pic, - alpha, 1, - model = "rho", - return_memberships = TRUE), + "rho" = c( + generate_bipartite_collection(nr1, nc1, + pir, pic, + alpha, 1, + model = "rho", + return_memberships = TRUE + ), generate_bipartite_collection(nr2, nc2, - pir, pic, - alpha, M-1, - model = "rho", - return_memberships = TRUE) + pir, pic, + alpha, M - 1, + model = "rho", + return_memberships = TRUE + ) ), - "pirho" = c(generate_bipartite_collection(nr1, nc1, - pir, pic, - alpha, 1, - model = "pirho", - return_memberships = TRUE), + "pirho" = c( + generate_bipartite_collection(nr1, nc1, + pir, pic, + alpha, 1, + model = "pirho", + return_memberships = TRUE + ), generate_bipartite_collection(nr2, nc2, - pir, pic, - alpha, M-1, - model = "pirho", - return_memberships = TRUE) + pir, pic, + alpha, M - 1, + model = "pirho", + return_memberships = TRUE + ) ) ) @@ -124,7 +136,7 @@ conditions <- expand.grid( ) -#  Data params +# Data params main_dir <- file.path("code", "results", "simulations", "NA_robustness") if (!dir.exists(main_dir)) { @@ -142,7 +154,7 @@ if (!dir.exists(temp_dir)) { } file_save <- file.path(main_dir, paste0( - "NA_robustness_", start_time, "_", sampling, + "NA_robustness_", start_time, "_", sampling, "_", struct, "_1-", nrow(conditions), ".Rds" )) @@ -179,7 +191,7 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) { NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected) - #  Computes the index as a single number, R stores matrices by column + # Computes the index as a single number, R stores matrices by column (NAs_selected_index_exp[["col"]] - 1) * nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]] }, @@ -190,7 +202,7 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) { NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected) - #  Computes the index as a single number, R stores matrices by column + # Computes the index as a single number, R stores matrices by column (NAs_selected_index_exp[["col"]] - 1) * nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]] }, @@ -201,7 +213,7 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) { col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected) NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected) - #  Computes the index as a single number, R stores matrices by column + # Computes the index as a single number, R stores matrices by column (NAs_selected_index_exp[["col"]] - 1) * nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]] } @@ -269,14 +281,13 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) { mybisbmpop[["best_fit"]][["Z"]][[1]][[2]] ), elapsed_secs = difftime(stop_time, start_time, units = "sec"), - sampling = sampling, struct = struct ) message("Finished step ", current, "/", nrow(conditions), "\n") - #  Saving temp + # Saving temp temp_file_save <- file.path(temp_dir, paste0( "conditions_", current, "_on_", nrow(conditions), ".Rds" @@ -296,4 +307,4 @@ saveRDS( result_dataframe, file = file_save ) -message("Finished simulations.") \ No newline at end of file +message("Finished simulations.") diff --git a/code/simulations/simulations_inference_bernoulli.R b/code/simulations/simulations_inference_bernoulli.R index f75cb1d..56ebb93 100644 --- a/code/simulations/simulations_inference_bernoulli.R +++ b/code/simulations/simulations_inference_bernoulli.R @@ -72,7 +72,7 @@ choosed_conditions <- seq.int(from = arg[1], to = arg[2]) conditions <- conditions[choosed_conditions, ] -#  Data params +# Data params main_dir <- file.path("code", "results", "simulations", "inference", "bernoulli") if (!dir.exists(main_dir)) { @@ -205,7 +205,7 @@ results <- parallel::mclapply(conditions_rows, function(s) { stop_time_condition <- Sys.time() - ##  Preparing date for export + ## Preparing date for export # BICLs sep_BICL <- sum(sep_BiSBM$BICL) iid_BICL <- fitted_bisbmpop_iid$best_fit$BICL @@ -318,7 +318,7 @@ results <- parallel::mclapply(conditions_rows, function(s) { ) message("Finished step ", s, "/", nrow(conditions)) - #  Saving temp + # Saving temp temp_file_save <- file.path(temp_dir, paste0( "conditions_", s, "_on_", nrow(conditions), ".Rds" @@ -326,7 +326,7 @@ results <- parallel::mclapply(conditions_rows, function(s) { saveRDS(object = data_frame_output, file = temp_file_save) - #  Saving inhabitual data + # Saving inhabitual data if (all(unlist(pirho_mean_ARIs) == 1L) & any(unlist(pirho_double_ARIs) < 1L)) { warning("Incorrect result encountered, saving.") incorrect_filepath <- file.path(temp_dir, paste0( @@ -358,4 +358,4 @@ full_data_frame <- do.call(rbind, results) saveRDS(full_data_frame, file = file_save ) -message("Finished simulations.") \ No newline at end of file +message("Finished simulations.") diff --git a/code/simulations/simulations_inference_poisson.R b/code/simulations/simulations_inference_poisson.R index 8033e3a..6023695 100644 --- a/code/simulations/simulations_inference_poisson.R +++ b/code/simulations/simulations_inference_poisson.R @@ -72,7 +72,7 @@ choosed_conditions <- seq.int(from = arg[1], to = arg[2]) conditions <- conditions[choosed_conditions, ] -#  Data params +# Data params main_dir <- file.path("code", "results", "simulations", "inference", "poisson") if (!dir.exists(main_dir)) { @@ -204,7 +204,7 @@ results <- parallel::mclapply(conditions_rows, function(s) { stop_time_condition <- Sys.time() - ##  Preparing date for export + ## Preparing date for export # BICLs sep_BICL <- sum(sep_BiSBM$BICL) iid_BICL <- fitted_bisbmpop_iid$best_fit$BICL @@ -317,7 +317,7 @@ results <- parallel::mclapply(conditions_rows, function(s) { ) message("Finished step ", s, "/", nrow(conditions)) - #  Saving temp + # Saving temp temp_file_save <- file.path(temp_dir, paste0( "conditions_", s, "_on_", nrow(conditions), ".Rds" @@ -325,7 +325,7 @@ results <- parallel::mclapply(conditions_rows, function(s) { saveRDS(object = data_frame_output, file = temp_file_save) - #  Saving inhabitual data + # Saving inhabitual data if (all(unlist(pirho_mean_ARIs) == 1L) & any(unlist(pirho_double_ARIs) < 1L)) { warning("Incorrect result encountered, saving.") incorrect_filepath <- file.path(temp_dir, paste0( @@ -357,4 +357,4 @@ full_data_frame <- do.call(rbind, results) saveRDS(full_data_frame, file = file_save ) -message("Finished simulations.") \ No newline at end of file +message("Finished simulations.") diff --git a/code/simulations/simulations_model_selection.R b/code/simulations/simulations_model_selection.R index 908f52b..08f16a9 100644 --- a/code/simulations/simulations_model_selection.R +++ b/code/simulations/simulations_model_selection.R @@ -72,7 +72,7 @@ if (arg[2] > nrow(conditions) | arg[2] < 1) { choosed_conditions <- seq.int(from = arg[1], to = arg[2]) conditions <- conditions[choosed_conditions, ] -#  Data params +# Data params main_dir <- file.path("code", "results", "simulations", "model_selection") if (!dir.exists(main_dir)) { @@ -246,7 +246,7 @@ with_progress({ ) message("Finished step ", s, "/", nrow(conditions)) - #  Saving temp + # Saving temp temp_file_save <- file.path(temp_dir, paste0( "conditions_", s, "_on_", nrow(conditions), ".Rds" @@ -264,4 +264,4 @@ full_data_frame <- do.call(rbind, results) saveRDS(full_data_frame, file = file_save ) -message("Finished simulations.") \ No newline at end of file +message("Finished simulations.")