Removing non breaking spaces
This commit is contained in:
parent
a9b6ae30d4
commit
b43618b378
27 changed files with 2124 additions and 234 deletions
|
|
@ -1,4 +1,4 @@
|
|||
# MIA Stage 2024
|
||||
# MIA Stage 2024
|
||||
|
||||
Ce dépôt accueille tout le travaille que je vais réaliser pendant mon stage au
|
||||
MIA Paris-Saclay en 2024.
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ output:
|
|||
---
|
||||
|
||||
```{r knitropts, echo = FALSE}
|
||||
knitr::opts_chunk$set(fig.width=12L, fig.height=10L)
|
||||
knitr::opts_chunk$set(fig.width = 12L, fig.height = 10L)
|
||||
```
|
||||
|
||||
```{r imports, echo = FALSE, include=FALSE}
|
||||
|
|
@ -19,7 +19,7 @@ loadNamespace(package = "patchwork")
|
|||
```
|
||||
|
||||
```{r data, echo = FALSE}
|
||||
# Loading data
|
||||
# Loading data
|
||||
data_folder <- file.path(here(), "code", "results", "simulations", "NA_robustness")
|
||||
files_list <- list.files(data_folder, pattern = "^NA_robustness_22-04-2024_17")
|
||||
```
|
||||
|
|
@ -31,13 +31,13 @@ prepare_dataframes_auc_ari <- function(data) {
|
|||
sampling <- unique(data[["sampling"]])
|
||||
struct <- unique(data[["struct"]])
|
||||
|
||||
# Averaging over repetitions
|
||||
# Averaging over repetitions
|
||||
averaged_data <- data %>%
|
||||
group_by(prop_NAs, model) %>%
|
||||
select(-c(repetition, sampling, struct)) %>%
|
||||
summarise_all(list(mean = mean, sd = sd))
|
||||
|
||||
# Preparing auc_data
|
||||
# Preparing auc_data
|
||||
auc_data <- averaged_data %>%
|
||||
select(c(prop_NAs, model) | contains("auc_")) %>%
|
||||
rename_with(~ gsub("auc_", "", .x, fixed = TRUE))
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ loadNamespace("kableExtra")
|
|||
```
|
||||
|
||||
```{r data, echo = FALSE}
|
||||
# Loading data
|
||||
# Loading data
|
||||
data_folder <- file.path(here(), "code", "results", "simulations", "inference", "bernoulli")
|
||||
# files_list <- list.files(data_folder, pattern = "^NA_robustness_19-04-2024")
|
||||
data <- readRDS(file.path(data_folder, "bernoulli_inference_06-05-2024_17-21-48_1-972.Rds"))
|
||||
|
|
@ -62,7 +62,7 @@ print_data <- av_data %>%
|
|||
group_by(epsilon_alpha) %>%
|
||||
round(2) %>%
|
||||
mutate(
|
||||
# Model comparison
|
||||
# Model comparison
|
||||
pirho_over_sep = pirho_over_sep_mean,
|
||||
pirho_over_iid = pirho_over_iid_mean,
|
||||
pirho_over_pi = pirho_over_pi_mean,
|
||||
|
|
@ -77,7 +77,7 @@ print_data <- av_data %>%
|
|||
pirho_Q2_over_4 = pirho_Q2_over_4_mean,
|
||||
|
||||
|
||||
# Grouping accuracy
|
||||
# Grouping accuracy
|
||||
pirho_mean_row_ARI = paste(pirho_mean_row_ARI_mean,
|
||||
pirho_mean_row_ARI_sd,
|
||||
sep = "\\pm"
|
||||
|
|
@ -132,7 +132,7 @@ ft_1 <- flextable(print_data) |>
|
|||
opts_html = list(
|
||||
scroll = list(freeze_first_column = TRUE)
|
||||
)
|
||||
) |>
|
||||
) |>
|
||||
compose(j = "pirho_mean_row_ARI", value = as_paragraph(as_equation(pirho_mean_row_ARI)))
|
||||
ft_1 <- compose(ft_1, j = "pirho_mean_col_ARI", value = as_paragraph(as_equation(pirho_mean_col_ARI)))
|
||||
ft_1 <- compose(ft_1, j = "pirho_double_row_ARI", value = as_paragraph(as_equation(pirho_double_row_ARI)))
|
||||
|
|
@ -143,7 +143,7 @@ ft_1 <- ft_1 |> mk_par(
|
|||
as_equation(col_keys, width = .1, height = .2)
|
||||
)
|
||||
)
|
||||
theme_zebra(ft_1) |>
|
||||
theme_zebra(ft_1) |>
|
||||
hline(i = 1, border = fp_border_default(), part = "header") |>
|
||||
vline(j = 1, border = fp_border_default(), part = "all") |>
|
||||
bg(j = 1, bg = "#DDDDDD", part = "all") |>
|
||||
|
|
|
|||
|
|
@ -6,9 +6,9 @@ set.seed(1234, "L'Ecuyer-CMRG")
|
|||
data(dorebipartite)
|
||||
str(dorebipartite)
|
||||
|
||||
nb_cores = parallelly::availableCores()
|
||||
concurrent_models <- min(length(seq(1,4)), nb_cores%/%2)
|
||||
per_model <- (nb_cores - concurrent_models) %/% length(seq(1,4))
|
||||
nb_cores <- parallelly::availableCores()
|
||||
concurrent_models <- min(length(seq(1, 4)), nb_cores %/% 2)
|
||||
per_model <- (nb_cores - concurrent_models) %/% length(seq(1, 4))
|
||||
|
||||
mb1 <- microbenchmark(
|
||||
"fancy computation" = {
|
||||
|
|
@ -44,4 +44,4 @@ mb2 <- microbenchmark(
|
|||
times = 5L
|
||||
)
|
||||
|
||||
# Bilan : plus intéressant de bourriner
|
||||
# Bilan : plus intéressant de bourriner
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ file_list <- list.files(data_folder)
|
|||
file <- file_list[[1]]
|
||||
data <- readRDS(file.path(data_folder, file))
|
||||
|
||||
# Extracting data to netlist and Z
|
||||
# Extracting data to netlist and Z
|
||||
netlist_memb <- data[["netlist"]]
|
||||
netlist <- lapply(netlist_memb, function(full) full[["incidence_matrix"]])
|
||||
nr <- nrow(netlist[[1]])
|
||||
|
|
@ -58,8 +58,6 @@ joined_col_memberships <- unlist(col_blockmemberships)
|
|||
|
||||
row_taus <- lapply(row_blockmemberships, function(Z1_m) colSBM:::.one_hot(Z1_m, 4))
|
||||
col_taus <- lapply(col_blockmemberships, function(Z2_m) colSBM:::.one_hot(Z2_m, 4))
|
||||
|
||||
|
||||
```
|
||||
|
||||
```{r parameters, echo = FALSE}
|
||||
|
|
@ -110,7 +108,7 @@ ARI(joined_col_memberships, good_joined_col)
|
|||
|
||||
```{r, echo = FALSE}
|
||||
wrap_plots(
|
||||
plot_alpha(data[["alpha"]])+ labs(caption = "Vrai alpha"),
|
||||
plot_alpha(data[["alpha"]]) + labs(caption = "Vrai alpha"),
|
||||
plot(good_model, type = "meso", mixture = TRUE, values = TRUE) + labs(caption = "Q = (4,4)"),
|
||||
plot(bad_model, type = "meso", mixture = TRUE, values = TRUE) + labs(caption = "Q = (4,5)")
|
||||
)
|
||||
|
|
@ -143,18 +141,20 @@ Ici on clone le modèle et on lui donne les bons paramètres pour voir s'il fait
|
|||
mieux que celui trouvé avant.
|
||||
|
||||
```{r testing_true_params, echo = FALSE}
|
||||
# Un clone
|
||||
# Un clone
|
||||
good_clone <- good_model$clone()
|
||||
|
||||
# Les vrais paramètres
|
||||
# Les vrais paramètres
|
||||
alpha <- data[["alpha"]]
|
||||
taus <- lapply(seq_along(row_taus), function(m) {
|
||||
list(row = row_taus[[m]],
|
||||
col = col_taus[[m]])
|
||||
list(
|
||||
row = row_taus[[m]],
|
||||
col = col_taus[[m]]
|
||||
)
|
||||
})
|
||||
# Pi params
|
||||
pi1 <- as.vector(data[["pi1"]])
|
||||
pi1[1] <- pi1[1] + 1e-9
|
||||
pi1[1] <- pi1[1] + 1e-9
|
||||
rho1 <- rep(0.25, 4)
|
||||
pi2 <- rep(0.25, 4)
|
||||
rho2 <- as.vector(data[["rho2"]])
|
||||
|
|
@ -165,15 +165,17 @@ good_clone[["tau"]] <- taus
|
|||
good_clone[["pi"]] <- pi
|
||||
good_clone[["alpha"]] <- alpha
|
||||
|
||||
result_BICL <- c(bad_BICL = bad_model$compute_BICL(),
|
||||
good_model_BICL = good_model$compute_BICL(),
|
||||
good_clone_BICL = good_clone$compute_BICL())
|
||||
result_BICL <- c(
|
||||
bad_BICL = bad_model$compute_BICL(),
|
||||
good_model_BICL = good_model$compute_BICL(),
|
||||
good_clone_BICL = good_clone$compute_BICL()
|
||||
)
|
||||
print(result_BICL)
|
||||
```
|
||||
|
||||
On vient mettre à la place du 4,4 le modèle avec les vrais paramètres.
|
||||
Et on espère voir la bonne information se diffuser.
|
||||
```{r adjusting, echo = FALSE}
|
||||
fitted_bisbmpop_pirho$model_list[[4,4]] <- good_clone
|
||||
fit_1_pass <- adjust_colBiSBM(fitted_bisbmpop_pirho, Q = c(4,4), depth = 1L, nb_pass = 1L)
|
||||
fitted_bisbmpop_pirho$model_list[[4, 4]] <- good_clone
|
||||
fit_1_pass <- adjust_colBiSBM(fitted_bisbmpop_pirho, Q = c(4, 4), depth = 1L, nb_pass = 1L)
|
||||
```
|
||||
|
|
@ -2,7 +2,7 @@ library(here)
|
|||
|
||||
inv_folder <- file.path(here(), "code", "results", "investigating", "split_clust")
|
||||
|
||||
for(file in list.files(inv_folder)) {
|
||||
for (file in list.files(inv_folder)) {
|
||||
load(file.path(inv_folder, file))
|
||||
}
|
||||
|
||||
|
|
@ -16,10 +16,9 @@ noname_X <- as.matrix(noname_X)
|
|||
|
||||
colSBM:::split_clust(noname_X, Z, Q = 1, is_bipartite = TRUE)
|
||||
|
||||
alt_X <- colSBM::generate_bipartite_collection(nr = 15, nc = 30, pi = c(1), rho = c(1), alpha = c(0.7), M = 1
|
||||
)[[1]]
|
||||
alt_X <- colSBM::generate_bipartite_collection(nr = 15, nc = 30, pi = c(1), rho = c(1), alpha = c(0.7), M = 1)[[1]]
|
||||
colSBM:::split_clust(alt_X, Z, Q = 1, is_bipartite = TRUE)
|
||||
|
||||
|
||||
# La solution était que X était regardé comme une table et unique marche
|
||||
# différemment
|
||||
# La solution était que X était regardé comme une table et unique marche
|
||||
# différemment
|
||||
|
|
|
|||
133
code/applications/base_analysis.qmd
Normal file
133
code/applications/base_analysis.qmd
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
### Modèle ${{model}}$
|
||||
|
||||
```{r}
|
||||
library(here)
|
||||
|
||||
library(colSBM)
|
||||
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
|
||||
library(plotly)
|
||||
library(ggplot2)
|
||||
library(ggforce)
|
||||
library(ggrepel)
|
||||
library(latex2exp)
|
||||
```
|
||||
```{r}
|
||||
list_collection <- readRDS("{{clustering}}")
|
||||
unlisted_best_partition <- unlist(extract_best_bipartite_partition(list_collection))
|
||||
|
||||
graph_size_df <- build_graph_size_dataframe(unlisted_best_partition)
|
||||
summarized_graph_size_df <- graph_size_df %>%
|
||||
group_by(collection_id) %>%
|
||||
select(-net_id) %>%
|
||||
summarise(
|
||||
M = n(),
|
||||
nr_mean = mean(nr),
|
||||
nr_sd = ifelse(length(nr) > 1, sd(nr), 0),
|
||||
nc_mean = mean(nc),
|
||||
nc_sd = ifelse(length(nc) > 1, sd(nc), 0),
|
||||
Qr = first(Qr),
|
||||
Qc = first(Qc)
|
||||
)
|
||||
```
|
||||
|
||||
:::{.panel-tabset}
|
||||
#### Analyse des tailles de cluster
|
||||
|
||||
```{r}
|
||||
filtered_summarized_graph_size_df <- summarized_graph_size_df
|
||||
ggplot(filtered_summarized_graph_size_df) +
|
||||
aes(x = Qc, y = Qr, color = collection_id, label = M) +
|
||||
geom_point() + # , position = position_jitter(h = 0.15, w = 0.05)
|
||||
geom_text_repel() +
|
||||
xlab(TeX("$Q_c$")) +
|
||||
ylab(TeX("$Q_r$")) +
|
||||
coord_fixed()
|
||||
|
||||
plot_ly(x = filtered_summarized_graph_size_df$Qc,
|
||||
y = filtered_summarized_graph_size_df$Qr,
|
||||
z = filtered_summarized_graph_size_df$M,
|
||||
type = "scatter3d", mode = "markers",
|
||||
color = filtered_summarized_graph_size_df$collection_id) %>%
|
||||
layout(scene = list(xaxis = list(title = "Q_c"),
|
||||
yaxis = list(title = "Q_r")))
|
||||
```
|
||||
|
||||
#### Analyse des tailles de réseaux
|
||||
|
||||
```{r}
|
||||
ggplot(filtered_summarized_graph_size_df) +
|
||||
aes(x = nc_mean, y = nr_mean, color = collection_id) +
|
||||
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
|
||||
geom_ellipse(aes(
|
||||
x0 = nc_mean, y0 = nr_mean,
|
||||
a = 0.5 * nc_sd,
|
||||
b = 0.5 * nr_sd,
|
||||
fill = collection_id, angle = 0
|
||||
), alpha = 0.1) +
|
||||
geom_text_repel(aes(label = collection_id)) +
|
||||
xlab(TeX("$n_c$")) +
|
||||
ylab(TeX("$n_r$")) +
|
||||
coord_fixed() +
|
||||
ggtitle("Distribution des tailles moyennes et écart-types")
|
||||
|
||||
max_nr <- 200
|
||||
max_nc <- 200
|
||||
|
||||
ggplot(filtered_summarized_graph_size_df %>% filter(nr_mean <= max_nr, nc_mean <= max_nc)) +
|
||||
aes(x = nc_mean, y = nr_mean, color = collection_id) +
|
||||
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
|
||||
geom_ellipse(aes(
|
||||
x0 = nc_mean, y0 = nr_mean,
|
||||
a = 0.5 * nc_sd,
|
||||
b = 0.5 * nr_sd,
|
||||
fill = collection_id, angle = 0
|
||||
), alpha = 0.1) +
|
||||
geom_text_repel(aes(label = collection_id)) +
|
||||
xlab(TeX("$n_c$")) +
|
||||
ylab(TeX("$n_r$")) +
|
||||
coord_fixed() +
|
||||
ggtitle(TeX(paste0("Distribution des tailles moyennes et écart-types,", "$n_r <=", max_nr, "$", " et ", "$n_c <=", max_nc, "$")))
|
||||
|
||||
filtered_graph_size <- graph_size_df
|
||||
ggplot(filtered_graph_size) +
|
||||
aes(x = collection_id, y = nr, fill = collection_id) +
|
||||
geom_boxplot()
|
||||
ggplot(filtered_graph_size) +
|
||||
aes(x = collection_id, y = nc, fill = collection_id) +
|
||||
geom_boxplot()
|
||||
|
||||
ggplot(filtered_graph_size) +
|
||||
aes(x = collection_id, y = nr + nc, fill = collection_id) +
|
||||
geom_boxplot()
|
||||
|
||||
ggplot(filtered_graph_size) +
|
||||
aes(x = collection_id, y = nr * nc, fill = collection_id) +
|
||||
geom_boxplot()
|
||||
```
|
||||
|
||||
:::
|
||||
|
||||
#### Structure des collections
|
||||
|
||||
```{r}
|
||||
#| output: asis
|
||||
for (i in seq_len(length(unlisted_best_partition))) {
|
||||
cat("\n##### Structure collection ", i, "\n")
|
||||
current_col <- unlisted_best_partition[[i]]$clone()
|
||||
current_col$net_id <- current_col$net_id %>%
|
||||
stringr::str_trunc(width = 20L)
|
||||
try({
|
||||
p <- plot(
|
||||
current_col,
|
||||
type = "meso",
|
||||
values = FALSE, mixture = TRUE
|
||||
) +
|
||||
ggtitle(paste0("Structure\ncollection ", i))
|
||||
print(p)
|
||||
})
|
||||
}
|
||||
|
||||
```
|
||||
|
|
@ -24,7 +24,7 @@ if (length(arg) == 0L) {
|
|||
}
|
||||
}
|
||||
|
||||
# Arguments checks
|
||||
# Arguments checks
|
||||
allowed_model <- c("iid", "pi", "rho", "pirho")
|
||||
stopifnot(
|
||||
"Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model),
|
||||
|
|
|
|||
1731
code/applications/dore/02_dore_analysis.html
Normal file
1731
code/applications/dore/02_dore_analysis.html
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -1,52 +1,46 @@
|
|||
---
|
||||
format: html
|
||||
title: Clustering avec `colSBM` des données
|
||||
execute:
|
||||
echo: false
|
||||
warning: false
|
||||
---
|
||||
|
||||
# Analyse
|
||||
|
||||
```{r}
|
||||
library(colSBM)
|
||||
library(here)
|
||||
library(stringr)
|
||||
```
|
||||
|
||||
```{r}
|
||||
root_app_folder <- file.path(here(), "code", "applications")
|
||||
source(file.path(root_app_folder, "utils.R"))
|
||||
data_folder <- file.path(here(), "code", "results", "applications", "dore")
|
||||
files_vec <- get_recent_files(data_folder)
|
||||
files_vec <- get_recent_files(data_folder, n = 16)
|
||||
files_vec <- identify_models(files_vec)
|
||||
list_collection <- readRDS(file.path(data_folder, "dore_collection_iid_24-05-24_18-07-50.Rds"))
|
||||
|
||||
best_partition <- extract_best_bipartite_partition(list_collection)
|
||||
length(unlist(best_partition))
|
||||
unlisted_bp <- unlist(best_partition)
|
||||
list_clustering <- files_vec
|
||||
# list_collection <- readRDS(file.path(data_folder, "dore_collection_iid_24-05-24_18-07-50.Rds"))
|
||||
```
|
||||
|
||||
```{r}
|
||||
library(stringr)
|
||||
## Analyse par modèle
|
||||
|
||||
# string_split <- function(string,size) {
|
||||
# unlist(str_extract_all(string, paste0('.{1,',size,'}')))
|
||||
# }
|
||||
:::{.panel-tabset}
|
||||
|
||||
# custom_str_wrap <- function(str, width = 30) {
|
||||
# if (nchar(str) > width) {
|
||||
# return(paste(string_split(str, size = width), collapse = "\n"))
|
||||
# }
|
||||
# return(str)
|
||||
# }
|
||||
```{r write_tabs}
|
||||
#| warning: false
|
||||
#| output: asis
|
||||
|
||||
iid_BICL <- sum(sapply(unlisted_bp, function(model) model$BICL))
|
||||
|
||||
# Shortening net names
|
||||
for (idx in seq_len(length(unlisted_bp))) {
|
||||
unlisted_bp[[idx]]$net_id <- sapply(unlisted_bp[[idx]]$net_id, function(id) str_trunc(id,20))
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
for (idx in seq_len(length(unlisted_bp))) {
|
||||
print(plot(unlisted_bp[[idx]], type = "meso", mixture = TRUE, values = TRUE))
|
||||
# Generate content for each model using knit_expand
|
||||
for (clustering_idx in seq_len(length(list_clustering))) {
|
||||
clustering <- list_clustering[clustering_idx]
|
||||
model <- names(clustering)
|
||||
expanded_content <- knitr::knit_expand(file = file.path(root_app_folder, "base_analysis.qmd"), clustering = clustering, model = model)
|
||||
res <- knitr::knit_child(text = expanded_content, quiet = TRUE)
|
||||
cat(res, sep = "\n")
|
||||
cat("\n")
|
||||
}
|
||||
```
|
||||
|
||||
Analyser par LBM :
|
||||
3,
|
||||
:::
|
||||
|
|
@ -28,7 +28,7 @@ if (length(arg) == 0L) {
|
|||
}
|
||||
}
|
||||
|
||||
# Arguments checks
|
||||
# Arguments checks
|
||||
allowed_model <- c("iid", "pi", "rho", "pirho")
|
||||
stopifnot(
|
||||
"Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model)
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -3,8 +3,18 @@ title: Analysis of herbivores data
|
|||
execute:
|
||||
echo: false
|
||||
freeze: auto
|
||||
warning: false
|
||||
---
|
||||
|
||||
```{r import_data}
|
||||
root_app_folder <- file.path(here(), "code", "applications")
|
||||
source(file.path(root_app_folder, "utils.R"))
|
||||
data_folder <- file.path(here(), "code", "results", "applications", "herbivores")
|
||||
files_vec <- get_recent_files(data_folder)
|
||||
filepath <- files_vec[1]
|
||||
list_collection <- readRDS(filepath)
|
||||
```
|
||||
|
||||
```{r libs_and_data}
|
||||
library(here)
|
||||
|
||||
|
|
@ -12,21 +22,14 @@ library(colSBM)
|
|||
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
|
||||
library(plotly)
|
||||
library(ggplot2)
|
||||
library(ggforce)
|
||||
library(weird)
|
||||
|
||||
root_app_folder <- file.path(here(), "code", "applications")
|
||||
source(file.path(root_app_folder, "utils.R"))
|
||||
data_folder <- file.path(here(), "code", "results", "applications", "herbivores")
|
||||
files_vec <- get_recent_files(data_folder)
|
||||
# files_vec <- identify_models(files_vec)
|
||||
library(ggrepel)
|
||||
library(latex2exp)
|
||||
```
|
||||
|
||||
```{r}
|
||||
filepath <- files_vec[1]
|
||||
list_collection <- readRDS(filepath)
|
||||
|
||||
```{r dataformatting}
|
||||
unlisted_best_partition <- unlist(extract_best_bipartite_partition(list_collection))
|
||||
|
||||
graph_size_df <- build_graph_size_dataframe(unlisted_best_partition)
|
||||
|
|
@ -41,39 +44,72 @@ summarized_graph_size_df <- graph_size_df %>%
|
|||
nc_sd = ifelse(length(nc) > 1, sd(nc), 0),
|
||||
Qr = first(Qr),
|
||||
Qc = first(Qc)
|
||||
)
|
||||
)
|
||||
```
|
||||
### Analyse des tailles de cluster
|
||||
```{r collection_blocks_plot_size}
|
||||
#| layout-ncol: 2
|
||||
#| plotly: true
|
||||
filtered_summarized_graph_size_df <- summarized_graph_size_df
|
||||
ggplot(filtered_summarized_graph_size_df) +
|
||||
aes(x = Qc, y = Qr, color = collection_id, label = M) +
|
||||
geom_point() + # , position = position_jitter(h = 0.15, w = 0.05)
|
||||
geom_text_repel() +
|
||||
xlab(TeX("$Q_c$")) +
|
||||
ylab(TeX("$Q_r$")) +
|
||||
coord_fixed()
|
||||
|
||||
plot_ly(x = filtered_summarized_graph_size_df$Qc,
|
||||
y = filtered_summarized_graph_size_df$Qr,
|
||||
z = filtered_summarized_graph_size_df$M,
|
||||
type = "scatter3d", mode = "markers",
|
||||
color = filtered_summarized_graph_size_df$collection_id) %>%
|
||||
layout(scene = list(xaxis = list(title = "Q_c"),
|
||||
yaxis = list(title = "Q_r")))
|
||||
```
|
||||
|
||||
```{r collection_size_boxplot}
|
||||
filtered_graph_size <- graph_size_df %>% filter(M > 5)
|
||||
### Analyse des tailles de réseaux
|
||||
|
||||
```{r size_plot}
|
||||
#| layout-ncol: 2
|
||||
ggplot(filtered_summarized_graph_size_df) +
|
||||
aes(x = nc_mean, y = nr_mean, color = collection_id) +
|
||||
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
|
||||
geom_ellipse(aes(
|
||||
x0 = nc_mean, y0 = nr_mean,
|
||||
a = 0.5 * nc_sd,
|
||||
b = 0.5 * nr_sd,
|
||||
fill = collection_id, angle = 0
|
||||
), alpha = 0.1) +
|
||||
geom_text_repel(aes(label = collection_id)) +
|
||||
xlab(TeX("$n_c$")) +
|
||||
ylab(TeX("$n_r$")) +
|
||||
coord_fixed() +
|
||||
ggtitle("Distribution des tailles moyennes et écart-types")
|
||||
|
||||
max_nr <- 200
|
||||
max_nc <- 200
|
||||
|
||||
ggplot(filtered_summarized_graph_size_df %>% filter(nr_mean <= max_nr, nc_mean <= max_nc)) +
|
||||
aes(x = nc_mean, y = nr_mean, color = collection_id) +
|
||||
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
|
||||
geom_ellipse(aes(
|
||||
x0 = nc_mean, y0 = nr_mean,
|
||||
a = 0.5 * nc_sd,
|
||||
b = 0.5 * nr_sd,
|
||||
fill = collection_id, angle = 0
|
||||
), alpha = 0.1) +
|
||||
geom_text_repel(aes(label = collection_id)) +
|
||||
xlab(TeX("$n_c$")) +
|
||||
ylab(TeX("$n_r$")) +
|
||||
coord_fixed() +
|
||||
ggtitle(TeX(paste0("Distribution des tailles moyennes et écart-types,", "$n_r <=", max_nr, "$", " et ", "$n_c <=", max_nc, "$")))
|
||||
|
||||
filtered_graph_size <- graph_size_df %>% filter(M > 1)
|
||||
ggplot(filtered_graph_size) +
|
||||
aes(x = collection_id, y = nr, fill = collection_id) +
|
||||
geom_boxplot()
|
||||
ggplot(filtered_graph_size) +
|
||||
aes(x = collection_id, y = nc, fill = collection_id) +
|
||||
geom_boxplot()
|
||||
|
||||
# ggplot(filtered_graph_size) +
|
||||
# aes(x = nc, y = nr, fill = collection_id) +
|
||||
gg_bagplot(filtered_graph_size, x = filtered_graph_size[["nc"]], y = filtered_graph_size[["nr"]])
|
||||
```
|
||||
|
||||
```{r collection_blocks_plot}
|
||||
filtered_summarized_graph_size_df <- summarized_graph_size_df %>% filter(M > 5)
|
||||
ggplot(filtered_summarized_graph_size_df) +
|
||||
aes(x = Qc, y = Qr, color = collection_id) +
|
||||
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
|
||||
geom_circle(aes(x0 = Qc, y0 = Qr, r = 0.025 * M, fill = collection_id), alpha = 0.1) +
|
||||
coord_fixed()
|
||||
|
||||
```
|
||||
|
||||
```{r collection_size_mean}
|
||||
filtered_summarized_graph_size_df <- summarized_graph_size_df %>% filter(M > 5)
|
||||
ggplot(filtered_summarized_graph_size_df) +
|
||||
aes(x = nc_mean, y = nr_mean, color = collection_id) +
|
||||
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
|
||||
geom_ellipse(aes(x0 = nc_mean, y0 = nr_mean, a = 0.5 * nc_sd, b = 0.5 * nr_sd, fill = collection_id, angle = 0), alpha = 0.1) +
|
||||
coord_fixed()
|
||||
|
||||
```
|
||||
|
|
|
|||
|
|
@ -8,8 +8,12 @@
|
|||
#'
|
||||
#' @return A vector of size `n` with the file path.
|
||||
get_recent_files <- function(data_folder, n = 4) {
|
||||
files_info <- file.info(file.path(data_folder, list.files(data_folder)))
|
||||
files_info[["filepath"]] <- file.path(data_folder, list.files(data_folder))
|
||||
files_info <- file.info(file.path(data_folder, list.files(data_folder,
|
||||
include.dirs = FALSE, pattern = "*dore_collection_[a-z]*_seed"
|
||||
)))
|
||||
files_info[["filepath"]] <- file.path(data_folder, list.files(data_folder,
|
||||
include.dirs = FALSE, pattern = "*dore_collection_[a-z]*_seed"
|
||||
))
|
||||
files_info <- sort_by(files_info, files_info[["ctime"]], decreasing = TRUE)
|
||||
if (n > nrow(files_info)) {
|
||||
warning(
|
||||
|
|
@ -23,17 +27,10 @@ get_recent_files <- function(data_folder, n = 4) {
|
|||
|
||||
#' Identify models
|
||||
identify_models <- function(files_vec) {
|
||||
iid_id <- grep(pattern = "iid", files_vec, fixed = TRUE)
|
||||
pi_id <- grep(pattern = "pi_", files_vec, fixed = TRUE)
|
||||
rho_id <- grep(pattern = "_rho", files_vec, fixed = TRUE)
|
||||
pirho_id <- grep(pattern = "pirho", files_vec, fixed = TRUE)
|
||||
file_order <- c(iid_id, pi_id, rho_id, pirho_id)
|
||||
stopifnot(
|
||||
"There are multiple files matching models !" =
|
||||
length(file_order) <= 4L
|
||||
names(files_vec) <- stringr::str_extract(
|
||||
string = files_vec,
|
||||
pattern = "(iid|pirho|pi|rho)"
|
||||
)
|
||||
files_vec <- files_vec[file_order]
|
||||
names(files_vec) <- c("iid", "pi", "rho", "pirho")
|
||||
return(files_vec)
|
||||
}
|
||||
|
||||
|
|
@ -50,4 +47,4 @@ build_graph_size_dataframe <- function(collection_list) {
|
|||
Qc = collection[["Q"]][[2]]
|
||||
)
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ if (length(arg) == 0L) {
|
|||
}
|
||||
}
|
||||
|
||||
# Arguments checks
|
||||
# Arguments checks
|
||||
allowed_model <- c("iid", "pi", "rho", "pirho")
|
||||
stopifnot(
|
||||
"Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model),
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -10,7 +10,7 @@
|
|||
#$ -o logs/$JOB_NAME.$TASK_ID
|
||||
#$ -e logs/$JOB_NAME.$TASK_ID
|
||||
|
||||
# Creating log directory if it doesn't exists
|
||||
# Creating log directory if it doesn't exists
|
||||
BASE_DIR="/home/$USER/work/mia-stage-2024"
|
||||
LOG_DIR=$(echo "$BASE_DIR/logs")
|
||||
|
||||
|
|
@ -18,12 +18,12 @@ if [ ! -d "$LOG_DIR" ]; then
|
|||
mkdir -p $LOG_DIR
|
||||
fi
|
||||
|
||||
# Constant data
|
||||
# Constant data
|
||||
MODELARRAY=("iid" "pi" "rho" "pirho")
|
||||
|
||||
MODEL=${MODELARRAY[$(($((SGE_TASK_ID - 1)) % 4))]}
|
||||
|
||||
# Finding directory
|
||||
# Finding directory
|
||||
APPLICATIONS_DIR=$(echo "$BASE_DIR/code/applications")
|
||||
|
||||
echo $APPLICATIONS_DIR
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
#$ -o logs/$JOB_NAME.$TASK_ID
|
||||
#$ -e logs/$JOB_NAME.$TASK_ID
|
||||
|
||||
# Creating log directory if it doesn't exists
|
||||
# Creating log directory if it doesn't exists
|
||||
BASE_DIR="/home/$USER/work/mia-stage-2024"
|
||||
LOG_DIR=$(echo "$BASE_DIR/logs")
|
||||
|
||||
|
|
@ -18,12 +18,12 @@ if [ ! -d "$LOG_DIR" ]; then
|
|||
mkdir -p $LOG_DIR
|
||||
fi
|
||||
|
||||
# Constant data
|
||||
# Constant data
|
||||
MODELARRAY=("iid" "pi" "rho" "pirho")
|
||||
|
||||
MODEL=${MODELARRAY[$(($((SGE_TASK_ID - 1)) % 4))]}
|
||||
|
||||
# Finding directory
|
||||
# Finding directory
|
||||
APPLICATIONS_DIR=$(echo "$BASE_DIR/code/applications")
|
||||
|
||||
echo $APPLICATIONS_DIR
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@
|
|||
#$ -o logs/$JOB_NAME.$TASK_ID
|
||||
#$ -e logs/$JOB_NAME.$TASK_ID
|
||||
|
||||
# Creating log directory if it doesn't exists
|
||||
# Creating log directory if it doesn't exists
|
||||
|
||||
# Constant data
|
||||
# Constant data
|
||||
STRUCTA=("nested" "modular")
|
||||
SAMPLINGA=("uniform" "row" "col" "rowcol")
|
||||
|
||||
|
|
@ -25,11 +25,11 @@ if [ ! -d "$LOG_DIR" ]; then
|
|||
mkdir -p $LOG_DIR
|
||||
fi
|
||||
|
||||
# Finding simulations directory
|
||||
# Finding simulations directory
|
||||
SIMULATIONS_DIR=$(echo "$BASE_DIR/code/simulations")
|
||||
|
||||
echo $SIMULATIONS_DIR
|
||||
|
||||
# Parsing sge array id
|
||||
# Parsing sge array id
|
||||
|
||||
Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct $STRUCT
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ if (length(arg) == 0L) {
|
|||
}
|
||||
}
|
||||
|
||||
# Arguments checks
|
||||
# Arguments checks
|
||||
allowed_struct <- c("modular", "nested")
|
||||
|
||||
stopifnot(
|
||||
|
|
@ -70,49 +70,61 @@ max_repetition <- 10L
|
|||
|
||||
# Collections
|
||||
collections <- list(
|
||||
"iid" = c(generate_bipartite_collection(nr1, nc1,
|
||||
pir, pic,
|
||||
alpha, 1,
|
||||
model = "iid",
|
||||
return_memberships = TRUE),
|
||||
"iid" = c(
|
||||
generate_bipartite_collection(nr1, nc1,
|
||||
pir, pic,
|
||||
alpha, 1,
|
||||
model = "iid",
|
||||
return_memberships = TRUE
|
||||
),
|
||||
generate_bipartite_collection(nr2, nc2,
|
||||
pir, pic,
|
||||
alpha, M-1,
|
||||
model = "iid",
|
||||
return_memberships = TRUE)
|
||||
pir, pic,
|
||||
alpha, M - 1,
|
||||
model = "iid",
|
||||
return_memberships = TRUE
|
||||
)
|
||||
),
|
||||
"pi" = c(generate_bipartite_collection(nr1, nc1,
|
||||
pir, pic,
|
||||
alpha, 1,
|
||||
model = "pi",
|
||||
return_memberships = TRUE),
|
||||
"pi" = c(
|
||||
generate_bipartite_collection(nr1, nc1,
|
||||
pir, pic,
|
||||
alpha, 1,
|
||||
model = "pi",
|
||||
return_memberships = TRUE
|
||||
),
|
||||
generate_bipartite_collection(nr2, nc2,
|
||||
pir, pic,
|
||||
alpha, M-1,
|
||||
model = "pi",
|
||||
return_memberships = TRUE)
|
||||
pir, pic,
|
||||
alpha, M - 1,
|
||||
model = "pi",
|
||||
return_memberships = TRUE
|
||||
)
|
||||
),
|
||||
"rho" = c(generate_bipartite_collection(nr1, nc1,
|
||||
pir, pic,
|
||||
alpha, 1,
|
||||
model = "rho",
|
||||
return_memberships = TRUE),
|
||||
"rho" = c(
|
||||
generate_bipartite_collection(nr1, nc1,
|
||||
pir, pic,
|
||||
alpha, 1,
|
||||
model = "rho",
|
||||
return_memberships = TRUE
|
||||
),
|
||||
generate_bipartite_collection(nr2, nc2,
|
||||
pir, pic,
|
||||
alpha, M-1,
|
||||
model = "rho",
|
||||
return_memberships = TRUE)
|
||||
pir, pic,
|
||||
alpha, M - 1,
|
||||
model = "rho",
|
||||
return_memberships = TRUE
|
||||
)
|
||||
),
|
||||
"pirho" = c(generate_bipartite_collection(nr1, nc1,
|
||||
pir, pic,
|
||||
alpha, 1,
|
||||
model = "pirho",
|
||||
return_memberships = TRUE),
|
||||
"pirho" = c(
|
||||
generate_bipartite_collection(nr1, nc1,
|
||||
pir, pic,
|
||||
alpha, 1,
|
||||
model = "pirho",
|
||||
return_memberships = TRUE
|
||||
),
|
||||
generate_bipartite_collection(nr2, nc2,
|
||||
pir, pic,
|
||||
alpha, M-1,
|
||||
model = "pirho",
|
||||
return_memberships = TRUE)
|
||||
pir, pic,
|
||||
alpha, M - 1,
|
||||
model = "pirho",
|
||||
return_memberships = TRUE
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
|
|
@ -124,7 +136,7 @@ conditions <- expand.grid(
|
|||
)
|
||||
|
||||
|
||||
# Data params
|
||||
# Data params
|
||||
main_dir <- file.path("code", "results", "simulations", "NA_robustness")
|
||||
|
||||
if (!dir.exists(main_dir)) {
|
||||
|
|
@ -142,7 +154,7 @@ if (!dir.exists(temp_dir)) {
|
|||
}
|
||||
|
||||
file_save <- file.path(main_dir, paste0(
|
||||
"NA_robustness_", start_time, "_", sampling,
|
||||
"NA_robustness_", start_time, "_", sampling,
|
||||
"_", struct, "_1-", nrow(conditions), ".Rds"
|
||||
))
|
||||
|
||||
|
|
@ -179,7 +191,7 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
|
|||
|
||||
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
|
||||
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
(NAs_selected_index_exp[["col"]] - 1) *
|
||||
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
|
||||
},
|
||||
|
|
@ -190,7 +202,7 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
|
|||
|
||||
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
|
||||
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
(NAs_selected_index_exp[["col"]] - 1) *
|
||||
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
|
||||
},
|
||||
|
|
@ -201,7 +213,7 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
|
|||
col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected)
|
||||
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
|
||||
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
# Computes the index as a single number, R stores matrices by column
|
||||
(NAs_selected_index_exp[["col"]] - 1) *
|
||||
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
|
||||
}
|
||||
|
|
@ -269,14 +281,13 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
|
|||
mybisbmpop[["best_fit"]][["Z"]][[1]][[2]]
|
||||
),
|
||||
elapsed_secs = difftime(stop_time, start_time, units = "sec"),
|
||||
|
||||
sampling = sampling,
|
||||
struct = struct
|
||||
)
|
||||
|
||||
message("Finished step ", current, "/", nrow(conditions), "\n")
|
||||
|
||||
# Saving temp
|
||||
# Saving temp
|
||||
temp_file_save <- file.path(temp_dir, paste0(
|
||||
"conditions_", current, "_on_",
|
||||
nrow(conditions), ".Rds"
|
||||
|
|
@ -296,4 +307,4 @@ saveRDS(
|
|||
result_dataframe,
|
||||
file = file_save
|
||||
)
|
||||
message("Finished simulations.")
|
||||
message("Finished simulations.")
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ choosed_conditions <- seq.int(from = arg[1], to = arg[2])
|
|||
|
||||
conditions <- conditions[choosed_conditions, ]
|
||||
|
||||
# Data params
|
||||
# Data params
|
||||
main_dir <- file.path("code", "results", "simulations", "inference", "bernoulli")
|
||||
|
||||
if (!dir.exists(main_dir)) {
|
||||
|
|
@ -205,7 +205,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
|
|||
|
||||
stop_time_condition <- Sys.time()
|
||||
|
||||
## Preparing date for export
|
||||
## Preparing date for export
|
||||
# BICLs
|
||||
sep_BICL <- sum(sep_BiSBM$BICL)
|
||||
iid_BICL <- fitted_bisbmpop_iid$best_fit$BICL
|
||||
|
|
@ -318,7 +318,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
|
|||
)
|
||||
message("Finished step ", s, "/", nrow(conditions))
|
||||
|
||||
# Saving temp
|
||||
# Saving temp
|
||||
temp_file_save <- file.path(temp_dir, paste0(
|
||||
"conditions_", s, "_on_",
|
||||
nrow(conditions), ".Rds"
|
||||
|
|
@ -326,7 +326,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
|
|||
|
||||
saveRDS(object = data_frame_output, file = temp_file_save)
|
||||
|
||||
# Saving inhabitual data
|
||||
# Saving inhabitual data
|
||||
if (all(unlist(pirho_mean_ARIs) == 1L) & any(unlist(pirho_double_ARIs) < 1L)) {
|
||||
warning("Incorrect result encountered, saving.")
|
||||
incorrect_filepath <- file.path(temp_dir, paste0(
|
||||
|
|
@ -358,4 +358,4 @@ full_data_frame <- do.call(rbind, results)
|
|||
saveRDS(full_data_frame,
|
||||
file = file_save
|
||||
)
|
||||
message("Finished simulations.")
|
||||
message("Finished simulations.")
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ choosed_conditions <- seq.int(from = arg[1], to = arg[2])
|
|||
|
||||
conditions <- conditions[choosed_conditions, ]
|
||||
|
||||
# Data params
|
||||
# Data params
|
||||
main_dir <- file.path("code", "results", "simulations", "inference", "poisson")
|
||||
|
||||
if (!dir.exists(main_dir)) {
|
||||
|
|
@ -204,7 +204,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
|
|||
|
||||
stop_time_condition <- Sys.time()
|
||||
|
||||
## Preparing date for export
|
||||
## Preparing date for export
|
||||
# BICLs
|
||||
sep_BICL <- sum(sep_BiSBM$BICL)
|
||||
iid_BICL <- fitted_bisbmpop_iid$best_fit$BICL
|
||||
|
|
@ -317,7 +317,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
|
|||
)
|
||||
message("Finished step ", s, "/", nrow(conditions))
|
||||
|
||||
# Saving temp
|
||||
# Saving temp
|
||||
temp_file_save <- file.path(temp_dir, paste0(
|
||||
"conditions_", s, "_on_",
|
||||
nrow(conditions), ".Rds"
|
||||
|
|
@ -325,7 +325,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
|
|||
|
||||
saveRDS(object = data_frame_output, file = temp_file_save)
|
||||
|
||||
# Saving inhabitual data
|
||||
# Saving inhabitual data
|
||||
if (all(unlist(pirho_mean_ARIs) == 1L) & any(unlist(pirho_double_ARIs) < 1L)) {
|
||||
warning("Incorrect result encountered, saving.")
|
||||
incorrect_filepath <- file.path(temp_dir, paste0(
|
||||
|
|
@ -357,4 +357,4 @@ full_data_frame <- do.call(rbind, results)
|
|||
saveRDS(full_data_frame,
|
||||
file = file_save
|
||||
)
|
||||
message("Finished simulations.")
|
||||
message("Finished simulations.")
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ if (arg[2] > nrow(conditions) | arg[2] < 1) {
|
|||
choosed_conditions <- seq.int(from = arg[1], to = arg[2])
|
||||
conditions <- conditions[choosed_conditions, ]
|
||||
|
||||
# Data params
|
||||
# Data params
|
||||
main_dir <- file.path("code", "results", "simulations", "model_selection")
|
||||
|
||||
if (!dir.exists(main_dir)) {
|
||||
|
|
@ -246,7 +246,7 @@ with_progress({
|
|||
)
|
||||
message("Finished step ", s, "/", nrow(conditions))
|
||||
|
||||
# Saving temp
|
||||
# Saving temp
|
||||
temp_file_save <- file.path(temp_dir, paste0(
|
||||
"conditions_", s, "_on_",
|
||||
nrow(conditions), ".Rds"
|
||||
|
|
@ -264,4 +264,4 @@ full_data_frame <- do.call(rbind, results)
|
|||
saveRDS(full_data_frame,
|
||||
file = file_save
|
||||
)
|
||||
message("Finished simulations.")
|
||||
message("Finished simulations.")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue