WOD 8/7
|
|
@ -1,5 +1,6 @@
|
|||
```{r libraries, echo = FALSE, include = FALSE}
|
||||
require("ggplot2")
|
||||
require("ggokabeito")
|
||||
require("tidyr")
|
||||
require("dplyr")
|
||||
require("patchwork")
|
||||
|
|
@ -10,119 +11,27 @@ require("latex2exp")
|
|||
|
||||
```{r impoting-data, echo = FALSE}
|
||||
filenames <- list.files(
|
||||
path = "./Rcodes/simulation/data",
|
||||
pattern = "simulated_collection_clustering_*",
|
||||
path = "./data",
|
||||
pattern = "simulated_collection_data_clustering_*",
|
||||
full.names = TRUE
|
||||
)
|
||||
|
||||
rep(gsub(".*_(iid|pi|rho|pirho)_.*", "\\1", filenames), each = 6)
|
||||
|
||||
# data_list <- lapply(filenames, function(file) lapply(readRDS(file), function(model) model$list_clustering))
|
||||
data_list <- lapply(filenames, readRDS)
|
||||
df_netclust <- do.call("rbind", lapply(filenames, readRDS))
|
||||
|
||||
|
||||
for (model in data_list) {
|
||||
list_clustering <- lapply(
|
||||
seq_along(model), function(s) model[[s]]$list_of_clusterings
|
||||
)
|
||||
|
||||
list_best_partition <- lapply(
|
||||
seq_along(list_clustering), function(s) {
|
||||
list(
|
||||
epsilon = result_clustering[[s]]$epsilon,
|
||||
best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]]))
|
||||
)
|
||||
}
|
||||
)
|
||||
data.frame(
|
||||
epsilon = sapply(
|
||||
list_best_partition,
|
||||
function(best_partition) {
|
||||
best_partition$epsilon
|
||||
}
|
||||
),
|
||||
ARI = sapply(
|
||||
list_best_partition,
|
||||
function(best_partition) {
|
||||
aricode::ARI(rep(1:3, each = 3), unlist(
|
||||
lapply(
|
||||
seq_along(best_partition$best_partition),
|
||||
function(idx) {
|
||||
setNames(
|
||||
rep(idx,
|
||||
best_partition$best_partition[[idx]]$M),
|
||||
best_partition$best_partition[[idx]]$net_id
|
||||
)
|
||||
}
|
||||
)
|
||||
))
|
||||
}
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
lapply(
|
||||
data_list,
|
||||
function(model) {
|
||||
list_clustering <- lapply(
|
||||
seq_along(model), function(s) model[[s]]$list_of_clusterings
|
||||
)
|
||||
|
||||
list_best_partition <- lapply(
|
||||
seq_along(list_clustering), function(s) {
|
||||
list(
|
||||
epsilon = result_clustering[[s]]$epsilon,
|
||||
best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]]))
|
||||
)
|
||||
}
|
||||
)
|
||||
data.frame(
|
||||
epsilon = sapply(
|
||||
list_best_partition,
|
||||
function(best_partition) {
|
||||
best_partition$epsilon
|
||||
}
|
||||
),
|
||||
ARI = sapply(
|
||||
list_best_partition,
|
||||
function(best_partition) {
|
||||
aricode::ARI(rep(1:3, each = 3), unlist(
|
||||
lapply(
|
||||
seq_along(best_partition$best_partition),
|
||||
function(idx) {
|
||||
setNames(
|
||||
rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id
|
||||
)
|
||||
}
|
||||
)
|
||||
))
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
}
|
||||
)
|
||||
data.frame(
|
||||
epsilon = sapply(
|
||||
list_best_partition,
|
||||
function(best_partition) {
|
||||
best_partition$epsilon
|
||||
}
|
||||
),
|
||||
ARI = sapply(
|
||||
list_best_partition,
|
||||
function(best_partition) {
|
||||
aricode::ARI(rep(1:3, each = 3), unlist(
|
||||
lapply(
|
||||
seq_along(best_partition$best_partition),
|
||||
function(idx) {
|
||||
setNames(
|
||||
rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id
|
||||
)
|
||||
}
|
||||
)
|
||||
))
|
||||
}
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
```{r netclustering-ARI-boxplot, echo = FALSE}
|
||||
#| dpi = 300,
|
||||
#| fig.asp = 0.5,
|
||||
#| fig.cap = "\\label{}ARI of the partition obtained by clustering in function of $\\eps$"
|
||||
df_netclust %>%
|
||||
ggplot() +
|
||||
aes(x = as.factor(epsilon), y = ARI) +
|
||||
scale_color_okabe_ito() +
|
||||
scale_fill_okabe_ito() +
|
||||
xlab(TeX("$\\epsilon$")) +
|
||||
guides(fill = guide_legend(title = "Model")) +
|
||||
ylab("ARI of obtained netclustering") +
|
||||
geom_boxplot(aes(fill = model))
|
||||
```
|
||||
|
|
@ -1,13 +1,8 @@
|
|||
\section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}
|
||||
|
||||
\begin{verbatim}
|
||||
## character(0)
|
||||
\end{verbatim}
|
||||
|
||||
\begin{verbatim}
|
||||
## list()
|
||||
\end{verbatim}
|
||||
|
||||
\begin{verbatim}
|
||||
## Error in eval(expr, envir, enclos): objet 'list_best_partition' introuvable
|
||||
\end{verbatim}
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{./img/99d363f6aa43bf0eba413cb994dc00b130709107.png}
|
||||
\caption{\label{}ARI of the partition obtained by clustering in function
|
||||
of \(\eps\)}
|
||||
\end{figure}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
require("ggplot2")
|
||||
require("tictoc")
|
||||
require("colSBM")
|
||||
|
||||
devtools::load_all("R/")
|
||||
set.seed(1234)
|
||||
|
||||
# Generation of conditions
|
||||
if (!exists("model_to_test")) {
|
||||
|
|
@ -17,7 +18,7 @@ nc <- 75
|
|||
|
||||
pi <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
|
||||
rho <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
|
||||
epsilons <- c(0.4)
|
||||
epsilons <- seq(0.1, 0.4, by = 0.1)
|
||||
|
||||
if (!exists("arg")) {
|
||||
arg <- commandArgs(trailingOnly = TRUE)
|
||||
|
|
@ -31,7 +32,7 @@ if (identical(arg, character(0))) {
|
|||
|
||||
conditions <- tidyr::crossing(epsilons, pi, rho, repetitions)
|
||||
|
||||
results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
|
||||
results <- lapply(seq_len(nrow(conditions)), function(s) {
|
||||
eps <- conditions[s, ]$epsilons
|
||||
current_pi <- conditions[s, ]$pi
|
||||
current_rho <- conditions[s, ]$rho
|
||||
|
|
@ -70,7 +71,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
|
|||
nr, nc,
|
||||
current_pi, current_rho,
|
||||
alpha_assortative, 3,
|
||||
model = model_to_test
|
||||
model = model_to_test,
|
||||
return_memberships = TRUE
|
||||
)
|
||||
|
||||
assortative_incidence <- lapply(
|
||||
|
|
@ -98,7 +100,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
|
|||
nr, nc,
|
||||
current_pi, current_rho,
|
||||
alpha_core_periphery, 3,
|
||||
model = model_to_test
|
||||
model = model_to_test,
|
||||
return_memberships = TRUE
|
||||
)
|
||||
|
||||
core_periphery_incidence <- lapply(
|
||||
|
|
@ -126,7 +129,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
|
|||
nr, nc,
|
||||
current_pi, current_rho,
|
||||
alpha_disassortative, 3,
|
||||
model = model_to_test
|
||||
model = model_to_test,
|
||||
return_memberships = TRUE
|
||||
)
|
||||
|
||||
disassortative_incidence <- lapply(
|
||||
|
|
@ -184,31 +188,40 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
|
|||
colsbm_model = model_to_test,
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1, verbosity = 2,
|
||||
plot_details = 0#,
|
||||
#parallelization_vector = c(FALSE, FALSE, FALSE)
|
||||
plot_details = 0 # ,
|
||||
# parallelization_vector = c(FALSE, FALSE, FALSE)
|
||||
),
|
||||
silent_parallelization = TRUE
|
||||
)
|
||||
toc()
|
||||
return(
|
||||
list(
|
||||
epsilon = eps,
|
||||
repetitions = repetitions,
|
||||
list_of_clusterings = list_collection,
|
||||
real_block_memberships = list(
|
||||
row = real_row_clustering,
|
||||
col = real_col_clustering
|
||||
|
||||
best_partitions <- unlist(extract_best_bipartite_partition(list_collection))
|
||||
clustering <- unlist(lapply(seq_along(best_partitions), function(col_idx) {
|
||||
setNames(
|
||||
rep(col_idx, best_partitions[[col_idx]]$M),
|
||||
best_partitions[[col_idx]]$net_id
|
||||
)
|
||||
))
|
||||
},
|
||||
mc.cores = parallel::detectCores() - 1,
|
||||
mc.progress = TRUE,
|
||||
mc.retry = -1
|
||||
}))
|
||||
# ARI computation
|
||||
clustering <- clustering[order(names(clustering))]
|
||||
ari <- aricode::ARI(rep(c(1, 2, 3), each = 3), clustering)
|
||||
|
||||
toc()
|
||||
cat(paste("Finished", s))
|
||||
return(
|
||||
data.frame(epsilon = eps, model = model_to_test, ARI = ari)
|
||||
)
|
||||
}
|
||||
# ,
|
||||
# mc.cores = parallel::detectCores() - 1,
|
||||
# mc.progress = TRUE,
|
||||
# mc.retry = -1
|
||||
)
|
||||
|
||||
saveRDS(results, file = paste0(
|
||||
data_frame_result <- do.call("rbind", results)
|
||||
|
||||
saveRDS(data_frame_result, file = paste0(
|
||||
"simulation/data/",
|
||||
"simulated_collection_clustering_",
|
||||
"simulated_collection_data_clustering_",
|
||||
model_to_test, "_",
|
||||
format(Sys.time(), "%d-%m-%y-%X"),
|
||||
".Rds"
|
||||
|
|
|
|||
BIN
figure/netclustering-ARI-boxplot-1.png
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
img/353fe7be5055ae41407f1c89793ea74014c17c8b.png
Normal file
|
After Width: | Height: | Size: 51 KiB |
BIN
img/399307245baa20546f626fa1394d31e9a0eb9241.png
Normal file
|
After Width: | Height: | Size: 23 KiB |
BIN
img/3ba92635cedfbf1fefd0d2078f39b33566d8d598.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
img/5164fdb9ce3a2342098367d2af256fced27269f5.png
Normal file
|
After Width: | Height: | Size: 72 KiB |
BIN
img/99d363f6aa43bf0eba413cb994dc00b130709107.png
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
img/d6ef94a0891ef7aa3c0b44ae71dfdacb8a5d3114.png
Normal file
|
After Width: | Height: | Size: 12 KiB |
BIN
img/e1edd7a345d455cbbd86f793df106686f91e258b.png
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
rapport.pdf
12
rapport.tex
|
|
@ -32,6 +32,9 @@
|
|||
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
|
||||
\usepackage{booktabs}
|
||||
|
||||
% Figure placement
|
||||
\floatplacement{figure}{H}
|
||||
|
||||
%% Tikz Related
|
||||
\usetikzlibrary{calc,shapes,backgrounds,arrows,automata,shadows,positioning}
|
||||
\usetikzlibrary{arrows,shapes,positioning,shadows,trees,calc,backgrounds,automata,positioning}
|
||||
|
|
@ -1060,6 +1063,15 @@ We illustrate our capacity to perform a partition of a collection for all
|
|||
colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}.
|
||||
|
||||
\chapter{Simulation studies}\label{chap:simulation-studies}
|
||||
|
||||
The below simulations are meant to test the capacities of our models.
|
||||
We assess the inference capacities of the algorithm and method, the model
|
||||
selection performances and the clustering capacities.
|
||||
|
||||
\paragraph{Reproducibility} All the codes used to obtain data and to perform the
|
||||
analysis can be found on the report repository at
|
||||
\url{https://gitea.polarolouis.fr/polarolouis/rapport-CEI-MIA-2023}.
|
||||
|
||||
\include{Rcodes/simulation/inference_analyze}
|
||||
\include{Rcodes/simulation/model_selection_analyze}
|
||||
\include{Rcodes/simulation/netclustering_analyze}
|
||||
|
|
|
|||