This commit is contained in:
Louis Lacoste 2023-07-08 18:28:43 +02:00
parent fff1998904
commit 2f561d57ce
19 changed files with 73 additions and 144 deletions

View file

@ -1,5 +1,6 @@
```{r libraries, echo = FALSE, include = FALSE} ```{r libraries, echo = FALSE, include = FALSE}
require("ggplot2") require("ggplot2")
require("ggokabeito")
require("tidyr") require("tidyr")
require("dplyr") require("dplyr")
require("patchwork") require("patchwork")
@ -10,119 +11,27 @@ require("latex2exp")
```{r impoting-data, echo = FALSE} ```{r impoting-data, echo = FALSE}
filenames <- list.files( filenames <- list.files(
path = "./Rcodes/simulation/data", path = "./data",
pattern = "simulated_collection_clustering_*", pattern = "simulated_collection_data_clustering_*",
full.names = TRUE full.names = TRUE
) )
rep(gsub(".*_(iid|pi|rho|pirho)_.*", "\\1", filenames), each = 6)
# data_list <- lapply(filenames, function(file) lapply(readRDS(file), function(model) model$list_clustering)) # data_list <- lapply(filenames, function(file) lapply(readRDS(file), function(model) model$list_clustering))
data_list <- lapply(filenames, readRDS) df_netclust <- do.call("rbind", lapply(filenames, readRDS))
```
for (model in data_list) { ```{r netclustering-ARI-boxplot, echo = FALSE}
list_clustering <- lapply( #| dpi = 300,
seq_along(model), function(s) model[[s]]$list_of_clusterings #| fig.asp = 0.5,
) #| fig.cap = "\\label{}ARI of the partition obtained by clustering in function of $\\eps$"
df_netclust %>%
list_best_partition <- lapply( ggplot() +
seq_along(list_clustering), function(s) { aes(x = as.factor(epsilon), y = ARI) +
list( scale_color_okabe_ito() +
epsilon = result_clustering[[s]]$epsilon, scale_fill_okabe_ito() +
best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]])) xlab(TeX("$\\epsilon$")) +
) guides(fill = guide_legend(title = "Model")) +
} ylab("ARI of obtained netclustering") +
) geom_boxplot(aes(fill = model))
data.frame(
epsilon = sapply(
list_best_partition,
function(best_partition) {
best_partition$epsilon
}
),
ARI = sapply(
list_best_partition,
function(best_partition) {
aricode::ARI(rep(1:3, each = 3), unlist(
lapply(
seq_along(best_partition$best_partition),
function(idx) {
setNames(
rep(idx,
best_partition$best_partition[[idx]]$M),
best_partition$best_partition[[idx]]$net_id
)
}
)
))
}
)
)
}
lapply(
data_list,
function(model) {
list_clustering <- lapply(
seq_along(model), function(s) model[[s]]$list_of_clusterings
)
list_best_partition <- lapply(
seq_along(list_clustering), function(s) {
list(
epsilon = result_clustering[[s]]$epsilon,
best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]]))
)
}
)
data.frame(
epsilon = sapply(
list_best_partition,
function(best_partition) {
best_partition$epsilon
}
),
ARI = sapply(
list_best_partition,
function(best_partition) {
aricode::ARI(rep(1:3, each = 3), unlist(
lapply(
seq_along(best_partition$best_partition),
function(idx) {
setNames(
rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id
)
}
)
))
}
)
)
}
)
data.frame(
epsilon = sapply(
list_best_partition,
function(best_partition) {
best_partition$epsilon
}
),
ARI = sapply(
list_best_partition,
function(best_partition) {
aricode::ARI(rep(1:3, each = 3), unlist(
lapply(
seq_along(best_partition$best_partition),
function(idx) {
setNames(
rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id
)
}
)
))
}
)
)
``` ```

View file

@ -1,13 +1,8 @@
\section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks} \section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}
\begin{verbatim} \begin{figure}
## character(0) \centering
\end{verbatim} \includegraphics{./img/99d363f6aa43bf0eba413cb994dc00b130709107.png}
\caption{\label{}ARI of the partition obtained by clustering in function
\begin{verbatim} of \(\eps\)}
## list() \end{figure}
\end{verbatim}
\begin{verbatim}
## Error in eval(expr, envir, enclos): objet 'list_best_partition' introuvable
\end{verbatim}

View file

@ -1,7 +1,8 @@
require("ggplot2") require("ggplot2")
require("tictoc") require("tictoc")
require("colSBM")
devtools::load_all("R/") set.seed(1234)
# Generation of conditions # Generation of conditions
if (!exists("model_to_test")) { if (!exists("model_to_test")) {
@ -17,7 +18,7 @@ nc <- 75
pi <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE) pi <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
rho <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE) rho <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
epsilons <- c(0.4) epsilons <- seq(0.1, 0.4, by = 0.1)
if (!exists("arg")) { if (!exists("arg")) {
arg <- commandArgs(trailingOnly = TRUE) arg <- commandArgs(trailingOnly = TRUE)
@ -31,7 +32,7 @@ if (identical(arg, character(0))) {
conditions <- tidyr::crossing(epsilons, pi, rho, repetitions) conditions <- tidyr::crossing(epsilons, pi, rho, repetitions)
results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) { results <- lapply(seq_len(nrow(conditions)), function(s) {
eps <- conditions[s, ]$epsilons eps <- conditions[s, ]$epsilons
current_pi <- conditions[s, ]$pi current_pi <- conditions[s, ]$pi
current_rho <- conditions[s, ]$rho current_rho <- conditions[s, ]$rho
@ -70,7 +71,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
nr, nc, nr, nc,
current_pi, current_rho, current_pi, current_rho,
alpha_assortative, 3, alpha_assortative, 3,
model = model_to_test model = model_to_test,
return_memberships = TRUE
) )
assortative_incidence <- lapply( assortative_incidence <- lapply(
@ -98,7 +100,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
nr, nc, nr, nc,
current_pi, current_rho, current_pi, current_rho,
alpha_core_periphery, 3, alpha_core_periphery, 3,
model = model_to_test model = model_to_test,
return_memberships = TRUE
) )
core_periphery_incidence <- lapply( core_periphery_incidence <- lapply(
@ -126,7 +129,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
nr, nc, nr, nc,
current_pi, current_rho, current_pi, current_rho,
alpha_disassortative, 3, alpha_disassortative, 3,
model = model_to_test model = model_to_test,
return_memberships = TRUE
) )
disassortative_incidence <- lapply( disassortative_incidence <- lapply(
@ -184,31 +188,40 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
colsbm_model = model_to_test, colsbm_model = model_to_test,
global_opts = list( global_opts = list(
nb_cores = parallel::detectCores() - 1, verbosity = 2, nb_cores = parallel::detectCores() - 1, verbosity = 2,
plot_details = 0#, plot_details = 0 # ,
#parallelization_vector = c(FALSE, FALSE, FALSE) # parallelization_vector = c(FALSE, FALSE, FALSE)
), ),
silent_parallelization = TRUE silent_parallelization = TRUE
) )
toc()
return( best_partitions <- unlist(extract_best_bipartite_partition(list_collection))
list( clustering <- unlist(lapply(seq_along(best_partitions), function(col_idx) {
epsilon = eps, setNames(
repetitions = repetitions, rep(col_idx, best_partitions[[col_idx]]$M),
list_of_clusterings = list_collection, best_partitions[[col_idx]]$net_id
real_block_memberships = list(
row = real_row_clustering,
col = real_col_clustering
) )
)) }))
}, # ARI computation
mc.cores = parallel::detectCores() - 1, clustering <- clustering[order(names(clustering))]
mc.progress = TRUE, ari <- aricode::ARI(rep(c(1, 2, 3), each = 3), clustering)
mc.retry = -1
toc()
cat(paste("Finished", s))
return(
data.frame(epsilon = eps, model = model_to_test, ARI = ari)
)
}
# ,
# mc.cores = parallel::detectCores() - 1,
# mc.progress = TRUE,
# mc.retry = -1
) )
saveRDS(results, file = paste0( data_frame_result <- do.call("rbind", results)
saveRDS(data_frame_result, file = paste0(
"simulation/data/", "simulation/data/",
"simulated_collection_clustering_", "simulated_collection_data_clustering_",
model_to_test, "_", model_to_test, "_",
format(Sys.time(), "%d-%m-%y-%X"), format(Sys.time(), "%d-%m-%y-%X"),
".Rds" ".Rds"

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

View file

@ -32,6 +32,9 @@
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\usepackage{booktabs} \usepackage{booktabs}
% Figure placement
\floatplacement{figure}{H}
%% Tikz Related %% Tikz Related
\usetikzlibrary{calc,shapes,backgrounds,arrows,automata,shadows,positioning} \usetikzlibrary{calc,shapes,backgrounds,arrows,automata,shadows,positioning}
\usetikzlibrary{arrows,shapes,positioning,shadows,trees,calc,backgrounds,automata,positioning} \usetikzlibrary{arrows,shapes,positioning,shadows,trees,calc,backgrounds,automata,positioning}
@ -1060,6 +1063,15 @@ We illustrate our capacity to perform a partition of a collection for all
colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}. colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}.
\chapter{Simulation studies}\label{chap:simulation-studies} \chapter{Simulation studies}\label{chap:simulation-studies}
The below simulations are meant to test the capacities of our models.
We assess the inference capacities of the algorithm and method, the model
selection performances and the clustering capacities.
\paragraph{Reproducibility} All the codes used to obtain data and to perform the
analysis can be found on the report repository at
\url{https://gitea.polarolouis.fr/polarolouis/rapport-CEI-MIA-2023}.
\include{Rcodes/simulation/inference_analyze} \include{Rcodes/simulation/inference_analyze}
\include{Rcodes/simulation/model_selection_analyze} \include{Rcodes/simulation/model_selection_analyze}
\include{Rcodes/simulation/netclustering_analyze} \include{Rcodes/simulation/netclustering_analyze}