This commit is contained in:
Louis Lacoste 2023-07-08 18:28:43 +02:00
parent fff1998904
commit 2f561d57ce
19 changed files with 73 additions and 144 deletions

View file

@ -1,5 +1,6 @@
```{r libraries, echo = FALSE, include = FALSE}
require("ggplot2")
require("ggokabeito")
require("tidyr")
require("dplyr")
require("patchwork")
@ -10,119 +11,27 @@ require("latex2exp")
```{r impoting-data, echo = FALSE}
filenames <- list.files(
path = "./Rcodes/simulation/data",
pattern = "simulated_collection_clustering_*",
path = "./data",
pattern = "simulated_collection_data_clustering_*",
full.names = TRUE
)
rep(gsub(".*_(iid|pi|rho|pirho)_.*", "\\1", filenames), each = 6)
# data_list <- lapply(filenames, function(file) lapply(readRDS(file), function(model) model$list_clustering))
data_list <- lapply(filenames, readRDS)
df_netclust <- do.call("rbind", lapply(filenames, readRDS))
for (model in data_list) {
list_clustering <- lapply(
seq_along(model), function(s) model[[s]]$list_of_clusterings
)
list_best_partition <- lapply(
seq_along(list_clustering), function(s) {
list(
epsilon = result_clustering[[s]]$epsilon,
best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]]))
)
}
)
data.frame(
epsilon = sapply(
list_best_partition,
function(best_partition) {
best_partition$epsilon
}
),
ARI = sapply(
list_best_partition,
function(best_partition) {
aricode::ARI(rep(1:3, each = 3), unlist(
lapply(
seq_along(best_partition$best_partition),
function(idx) {
setNames(
rep(idx,
best_partition$best_partition[[idx]]$M),
best_partition$best_partition[[idx]]$net_id
)
}
)
))
}
)
)
}
lapply(
data_list,
function(model) {
list_clustering <- lapply(
seq_along(model), function(s) model[[s]]$list_of_clusterings
)
list_best_partition <- lapply(
seq_along(list_clustering), function(s) {
list(
epsilon = result_clustering[[s]]$epsilon,
best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]]))
)
}
)
data.frame(
epsilon = sapply(
list_best_partition,
function(best_partition) {
best_partition$epsilon
}
),
ARI = sapply(
list_best_partition,
function(best_partition) {
aricode::ARI(rep(1:3, each = 3), unlist(
lapply(
seq_along(best_partition$best_partition),
function(idx) {
setNames(
rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id
)
}
)
))
}
)
)
}
)
data.frame(
epsilon = sapply(
list_best_partition,
function(best_partition) {
best_partition$epsilon
}
),
ARI = sapply(
list_best_partition,
function(best_partition) {
aricode::ARI(rep(1:3, each = 3), unlist(
lapply(
seq_along(best_partition$best_partition),
function(idx) {
setNames(
rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id
)
}
)
))
}
)
)
```
```{r netclustering-ARI-boxplot, echo = FALSE}
#| dpi = 300,
#| fig.asp = 0.5,
#| fig.cap = "\\label{}ARI of the partition obtained by clustering in function of $\\eps$"
df_netclust %>%
ggplot() +
aes(x = as.factor(epsilon), y = ARI) +
scale_color_okabe_ito() +
scale_fill_okabe_ito() +
xlab(TeX("$\\epsilon$")) +
guides(fill = guide_legend(title = "Model")) +
ylab("ARI of obtained netclustering") +
geom_boxplot(aes(fill = model))
```

View file

@ -1,13 +1,8 @@
\section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}
\begin{verbatim}
## character(0)
\end{verbatim}
\begin{verbatim}
## list()
\end{verbatim}
\begin{verbatim}
## Error in eval(expr, envir, enclos): objet 'list_best_partition' introuvable
\end{verbatim}
\begin{figure}
\centering
\includegraphics{./img/99d363f6aa43bf0eba413cb994dc00b130709107.png}
\caption{\label{}ARI of the partition obtained by clustering in function
of \(\eps\)}
\end{figure}

View file

@ -1,7 +1,8 @@
require("ggplot2")
require("tictoc")
require("colSBM")
devtools::load_all("R/")
set.seed(1234)
# Generation of conditions
if (!exists("model_to_test")) {
@ -17,7 +18,7 @@ nc <- 75
pi <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
rho <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
epsilons <- c(0.4)
epsilons <- seq(0.1, 0.4, by = 0.1)
if (!exists("arg")) {
arg <- commandArgs(trailingOnly = TRUE)
@ -31,7 +32,7 @@ if (identical(arg, character(0))) {
conditions <- tidyr::crossing(epsilons, pi, rho, repetitions)
results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
results <- lapply(seq_len(nrow(conditions)), function(s) {
eps <- conditions[s, ]$epsilons
current_pi <- conditions[s, ]$pi
current_rho <- conditions[s, ]$rho
@ -70,7 +71,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
nr, nc,
current_pi, current_rho,
alpha_assortative, 3,
model = model_to_test
model = model_to_test,
return_memberships = TRUE
)
assortative_incidence <- lapply(
@ -98,7 +100,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
nr, nc,
current_pi, current_rho,
alpha_core_periphery, 3,
model = model_to_test
model = model_to_test,
return_memberships = TRUE
)
core_periphery_incidence <- lapply(
@ -126,7 +129,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
nr, nc,
current_pi, current_rho,
alpha_disassortative, 3,
model = model_to_test
model = model_to_test,
return_memberships = TRUE
)
disassortative_incidence <- lapply(
@ -189,26 +193,35 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
),
silent_parallelization = TRUE
)
toc()
return(
list(
epsilon = eps,
repetitions = repetitions,
list_of_clusterings = list_collection,
real_block_memberships = list(
row = real_row_clustering,
col = real_col_clustering
best_partitions <- unlist(extract_best_bipartite_partition(list_collection))
clustering <- unlist(lapply(seq_along(best_partitions), function(col_idx) {
setNames(
rep(col_idx, best_partitions[[col_idx]]$M),
best_partitions[[col_idx]]$net_id
)
))
},
mc.cores = parallel::detectCores() - 1,
mc.progress = TRUE,
mc.retry = -1
}))
# ARI computation
clustering <- clustering[order(names(clustering))]
ari <- aricode::ARI(rep(c(1, 2, 3), each = 3), clustering)
toc()
cat(paste("Finished", s))
return(
data.frame(epsilon = eps, model = model_to_test, ARI = ari)
)
}
# ,
# mc.cores = parallel::detectCores() - 1,
# mc.progress = TRUE,
# mc.retry = -1
)
saveRDS(results, file = paste0(
data_frame_result <- do.call("rbind", results)
saveRDS(data_frame_result, file = paste0(
"simulation/data/",
"simulated_collection_clustering_",
"simulated_collection_data_clustering_",
model_to_test, "_",
format(Sys.time(), "%d-%m-%y-%X"),
".Rds"

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

View file

@ -32,6 +32,9 @@
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\usepackage{booktabs}
% Figure placement
\floatplacement{figure}{H}
%% Tikz Related
\usetikzlibrary{calc,shapes,backgrounds,arrows,automata,shadows,positioning}
\usetikzlibrary{arrows,shapes,positioning,shadows,trees,calc,backgrounds,automata,positioning}
@ -1060,6 +1063,15 @@ We illustrate our capacity to perform a partition of a collection for all
colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}.
\chapter{Simulation studies}\label{chap:simulation-studies}
The below simulations are meant to test the capacities of our models.
We assess the inference capacities of the algorithm and method, the model
selection performances and the clustering capacities.
\paragraph{Reproducibility} All the codes used to obtain data and to perform the
analysis can be found on the report repository at
\url{https://gitea.polarolouis.fr/polarolouis/rapport-CEI-MIA-2023}.
\include{Rcodes/simulation/inference_analyze}
\include{Rcodes/simulation/model_selection_analyze}
\include{Rcodes/simulation/netclustering_analyze}