WOD 8/7
|
|
@ -1,5 +1,6 @@
|
||||||
```{r libraries, echo = FALSE, include = FALSE}
|
```{r libraries, echo = FALSE, include = FALSE}
|
||||||
require("ggplot2")
|
require("ggplot2")
|
||||||
|
require("ggokabeito")
|
||||||
require("tidyr")
|
require("tidyr")
|
||||||
require("dplyr")
|
require("dplyr")
|
||||||
require("patchwork")
|
require("patchwork")
|
||||||
|
|
@ -10,119 +11,27 @@ require("latex2exp")
|
||||||
|
|
||||||
```{r impoting-data, echo = FALSE}
|
```{r impoting-data, echo = FALSE}
|
||||||
filenames <- list.files(
|
filenames <- list.files(
|
||||||
path = "./Rcodes/simulation/data",
|
path = "./data",
|
||||||
pattern = "simulated_collection_clustering_*",
|
pattern = "simulated_collection_data_clustering_*",
|
||||||
full.names = TRUE
|
full.names = TRUE
|
||||||
)
|
)
|
||||||
|
|
||||||
rep(gsub(".*_(iid|pi|rho|pirho)_.*", "\\1", filenames), each = 6)
|
|
||||||
|
|
||||||
# data_list <- lapply(filenames, function(file) lapply(readRDS(file), function(model) model$list_clustering))
|
# data_list <- lapply(filenames, function(file) lapply(readRDS(file), function(model) model$list_clustering))
|
||||||
data_list <- lapply(filenames, readRDS)
|
df_netclust <- do.call("rbind", lapply(filenames, readRDS))
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
for (model in data_list) {
|
```{r netclustering-ARI-boxplot, echo = FALSE}
|
||||||
list_clustering <- lapply(
|
#| dpi = 300,
|
||||||
seq_along(model), function(s) model[[s]]$list_of_clusterings
|
#| fig.asp = 0.5,
|
||||||
)
|
#| fig.cap = "\\label{}ARI of the partition obtained by clustering in function of $\\eps$"
|
||||||
|
df_netclust %>%
|
||||||
list_best_partition <- lapply(
|
ggplot() +
|
||||||
seq_along(list_clustering), function(s) {
|
aes(x = as.factor(epsilon), y = ARI) +
|
||||||
list(
|
scale_color_okabe_ito() +
|
||||||
epsilon = result_clustering[[s]]$epsilon,
|
scale_fill_okabe_ito() +
|
||||||
best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]]))
|
xlab(TeX("$\\epsilon$")) +
|
||||||
)
|
guides(fill = guide_legend(title = "Model")) +
|
||||||
}
|
ylab("ARI of obtained netclustering") +
|
||||||
)
|
geom_boxplot(aes(fill = model))
|
||||||
data.frame(
|
|
||||||
epsilon = sapply(
|
|
||||||
list_best_partition,
|
|
||||||
function(best_partition) {
|
|
||||||
best_partition$epsilon
|
|
||||||
}
|
|
||||||
),
|
|
||||||
ARI = sapply(
|
|
||||||
list_best_partition,
|
|
||||||
function(best_partition) {
|
|
||||||
aricode::ARI(rep(1:3, each = 3), unlist(
|
|
||||||
lapply(
|
|
||||||
seq_along(best_partition$best_partition),
|
|
||||||
function(idx) {
|
|
||||||
setNames(
|
|
||||||
rep(idx,
|
|
||||||
best_partition$best_partition[[idx]]$M),
|
|
||||||
best_partition$best_partition[[idx]]$net_id
|
|
||||||
)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
))
|
|
||||||
}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
lapply(
|
|
||||||
data_list,
|
|
||||||
function(model) {
|
|
||||||
list_clustering <- lapply(
|
|
||||||
seq_along(model), function(s) model[[s]]$list_of_clusterings
|
|
||||||
)
|
|
||||||
|
|
||||||
list_best_partition <- lapply(
|
|
||||||
seq_along(list_clustering), function(s) {
|
|
||||||
list(
|
|
||||||
epsilon = result_clustering[[s]]$epsilon,
|
|
||||||
best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]]))
|
|
||||||
)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
data.frame(
|
|
||||||
epsilon = sapply(
|
|
||||||
list_best_partition,
|
|
||||||
function(best_partition) {
|
|
||||||
best_partition$epsilon
|
|
||||||
}
|
|
||||||
),
|
|
||||||
ARI = sapply(
|
|
||||||
list_best_partition,
|
|
||||||
function(best_partition) {
|
|
||||||
aricode::ARI(rep(1:3, each = 3), unlist(
|
|
||||||
lapply(
|
|
||||||
seq_along(best_partition$best_partition),
|
|
||||||
function(idx) {
|
|
||||||
setNames(
|
|
||||||
rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id
|
|
||||||
)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
))
|
|
||||||
}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
}
|
|
||||||
)
|
|
||||||
data.frame(
|
|
||||||
epsilon = sapply(
|
|
||||||
list_best_partition,
|
|
||||||
function(best_partition) {
|
|
||||||
best_partition$epsilon
|
|
||||||
}
|
|
||||||
),
|
|
||||||
ARI = sapply(
|
|
||||||
list_best_partition,
|
|
||||||
function(best_partition) {
|
|
||||||
aricode::ARI(rep(1:3, each = 3), unlist(
|
|
||||||
lapply(
|
|
||||||
seq_along(best_partition$best_partition),
|
|
||||||
function(idx) {
|
|
||||||
setNames(
|
|
||||||
rep(idx, best_partition$best_partition[[idx]]$M), best_partition$best_partition[[idx]]$net_id
|
|
||||||
)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
))
|
|
||||||
}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
```
|
```
|
||||||
|
|
@ -1,13 +1,8 @@
|
||||||
\section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}
|
\section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{figure}
|
||||||
## character(0)
|
\centering
|
||||||
\end{verbatim}
|
\includegraphics{./img/99d363f6aa43bf0eba413cb994dc00b130709107.png}
|
||||||
|
\caption{\label{}ARI of the partition obtained by clustering in function
|
||||||
\begin{verbatim}
|
of \(\eps\)}
|
||||||
## list()
|
\end{figure}
|
||||||
\end{verbatim}
|
|
||||||
|
|
||||||
\begin{verbatim}
|
|
||||||
## Error in eval(expr, envir, enclos): objet 'list_best_partition' introuvable
|
|
||||||
\end{verbatim}
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
require("ggplot2")
|
require("ggplot2")
|
||||||
require("tictoc")
|
require("tictoc")
|
||||||
|
require("colSBM")
|
||||||
|
|
||||||
devtools::load_all("R/")
|
set.seed(1234)
|
||||||
|
|
||||||
# Generation of conditions
|
# Generation of conditions
|
||||||
if (!exists("model_to_test")) {
|
if (!exists("model_to_test")) {
|
||||||
|
|
@ -17,7 +18,7 @@ nc <- 75
|
||||||
|
|
||||||
pi <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
|
pi <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
|
||||||
rho <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
|
rho <- matrix(c(0.2, 0.3, 0.5), nrow = 1, byrow = TRUE)
|
||||||
epsilons <- c(0.4)
|
epsilons <- seq(0.1, 0.4, by = 0.1)
|
||||||
|
|
||||||
if (!exists("arg")) {
|
if (!exists("arg")) {
|
||||||
arg <- commandArgs(trailingOnly = TRUE)
|
arg <- commandArgs(trailingOnly = TRUE)
|
||||||
|
|
@ -31,7 +32,7 @@ if (identical(arg, character(0))) {
|
||||||
|
|
||||||
conditions <- tidyr::crossing(epsilons, pi, rho, repetitions)
|
conditions <- tidyr::crossing(epsilons, pi, rho, repetitions)
|
||||||
|
|
||||||
results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
|
results <- lapply(seq_len(nrow(conditions)), function(s) {
|
||||||
eps <- conditions[s, ]$epsilons
|
eps <- conditions[s, ]$epsilons
|
||||||
current_pi <- conditions[s, ]$pi
|
current_pi <- conditions[s, ]$pi
|
||||||
current_rho <- conditions[s, ]$rho
|
current_rho <- conditions[s, ]$rho
|
||||||
|
|
@ -70,7 +71,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
|
||||||
nr, nc,
|
nr, nc,
|
||||||
current_pi, current_rho,
|
current_pi, current_rho,
|
||||||
alpha_assortative, 3,
|
alpha_assortative, 3,
|
||||||
model = model_to_test
|
model = model_to_test,
|
||||||
|
return_memberships = TRUE
|
||||||
)
|
)
|
||||||
|
|
||||||
assortative_incidence <- lapply(
|
assortative_incidence <- lapply(
|
||||||
|
|
@ -98,7 +100,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
|
||||||
nr, nc,
|
nr, nc,
|
||||||
current_pi, current_rho,
|
current_pi, current_rho,
|
||||||
alpha_core_periphery, 3,
|
alpha_core_periphery, 3,
|
||||||
model = model_to_test
|
model = model_to_test,
|
||||||
|
return_memberships = TRUE
|
||||||
)
|
)
|
||||||
|
|
||||||
core_periphery_incidence <- lapply(
|
core_periphery_incidence <- lapply(
|
||||||
|
|
@ -126,7 +129,8 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
|
||||||
nr, nc,
|
nr, nc,
|
||||||
current_pi, current_rho,
|
current_pi, current_rho,
|
||||||
alpha_disassortative, 3,
|
alpha_disassortative, 3,
|
||||||
model = model_to_test
|
model = model_to_test,
|
||||||
|
return_memberships = TRUE
|
||||||
)
|
)
|
||||||
|
|
||||||
disassortative_incidence <- lapply(
|
disassortative_incidence <- lapply(
|
||||||
|
|
@ -184,31 +188,40 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
|
||||||
colsbm_model = model_to_test,
|
colsbm_model = model_to_test,
|
||||||
global_opts = list(
|
global_opts = list(
|
||||||
nb_cores = parallel::detectCores() - 1, verbosity = 2,
|
nb_cores = parallel::detectCores() - 1, verbosity = 2,
|
||||||
plot_details = 0#,
|
plot_details = 0 # ,
|
||||||
#parallelization_vector = c(FALSE, FALSE, FALSE)
|
# parallelization_vector = c(FALSE, FALSE, FALSE)
|
||||||
),
|
),
|
||||||
silent_parallelization = TRUE
|
silent_parallelization = TRUE
|
||||||
)
|
)
|
||||||
toc()
|
|
||||||
return(
|
best_partitions <- unlist(extract_best_bipartite_partition(list_collection))
|
||||||
list(
|
clustering <- unlist(lapply(seq_along(best_partitions), function(col_idx) {
|
||||||
epsilon = eps,
|
setNames(
|
||||||
repetitions = repetitions,
|
rep(col_idx, best_partitions[[col_idx]]$M),
|
||||||
list_of_clusterings = list_collection,
|
best_partitions[[col_idx]]$net_id
|
||||||
real_block_memberships = list(
|
|
||||||
row = real_row_clustering,
|
|
||||||
col = real_col_clustering
|
|
||||||
)
|
)
|
||||||
))
|
}))
|
||||||
},
|
# ARI computation
|
||||||
mc.cores = parallel::detectCores() - 1,
|
clustering <- clustering[order(names(clustering))]
|
||||||
mc.progress = TRUE,
|
ari <- aricode::ARI(rep(c(1, 2, 3), each = 3), clustering)
|
||||||
mc.retry = -1
|
|
||||||
|
toc()
|
||||||
|
cat(paste("Finished", s))
|
||||||
|
return(
|
||||||
|
data.frame(epsilon = eps, model = model_to_test, ARI = ari)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
# ,
|
||||||
|
# mc.cores = parallel::detectCores() - 1,
|
||||||
|
# mc.progress = TRUE,
|
||||||
|
# mc.retry = -1
|
||||||
)
|
)
|
||||||
|
|
||||||
saveRDS(results, file = paste0(
|
data_frame_result <- do.call("rbind", results)
|
||||||
|
|
||||||
|
saveRDS(data_frame_result, file = paste0(
|
||||||
"simulation/data/",
|
"simulation/data/",
|
||||||
"simulated_collection_clustering_",
|
"simulated_collection_data_clustering_",
|
||||||
model_to_test, "_",
|
model_to_test, "_",
|
||||||
format(Sys.time(), "%d-%m-%y-%X"),
|
format(Sys.time(), "%d-%m-%y-%X"),
|
||||||
".Rds"
|
".Rds"
|
||||||
|
|
|
||||||
BIN
figure/netclustering-ARI-boxplot-1.png
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
img/353fe7be5055ae41407f1c89793ea74014c17c8b.png
Normal file
|
After Width: | Height: | Size: 51 KiB |
BIN
img/399307245baa20546f626fa1394d31e9a0eb9241.png
Normal file
|
After Width: | Height: | Size: 23 KiB |
BIN
img/3ba92635cedfbf1fefd0d2078f39b33566d8d598.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
img/5164fdb9ce3a2342098367d2af256fced27269f5.png
Normal file
|
After Width: | Height: | Size: 72 KiB |
BIN
img/99d363f6aa43bf0eba413cb994dc00b130709107.png
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
img/d6ef94a0891ef7aa3c0b44ae71dfdacb8a5d3114.png
Normal file
|
After Width: | Height: | Size: 12 KiB |
BIN
img/e1edd7a345d455cbbd86f793df106686f91e258b.png
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
rapport.pdf
12
rapport.tex
|
|
@ -32,6 +32,9 @@
|
||||||
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
|
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
|
||||||
\usepackage{booktabs}
|
\usepackage{booktabs}
|
||||||
|
|
||||||
|
% Figure placement
|
||||||
|
\floatplacement{figure}{H}
|
||||||
|
|
||||||
%% Tikz Related
|
%% Tikz Related
|
||||||
\usetikzlibrary{calc,shapes,backgrounds,arrows,automata,shadows,positioning}
|
\usetikzlibrary{calc,shapes,backgrounds,arrows,automata,shadows,positioning}
|
||||||
\usetikzlibrary{arrows,shapes,positioning,shadows,trees,calc,backgrounds,automata,positioning}
|
\usetikzlibrary{arrows,shapes,positioning,shadows,trees,calc,backgrounds,automata,positioning}
|
||||||
|
|
@ -1060,6 +1063,15 @@ We illustrate our capacity to perform a partition of a collection for all
|
||||||
colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}.
|
colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}.
|
||||||
|
|
||||||
\chapter{Simulation studies}\label{chap:simulation-studies}
|
\chapter{Simulation studies}\label{chap:simulation-studies}
|
||||||
|
|
||||||
|
The below simulations are meant to test the capacities of our models.
|
||||||
|
We assess the inference capacities of the algorithm and method, the model
|
||||||
|
selection performances and the clustering capacities.
|
||||||
|
|
||||||
|
\paragraph{Reproducibility} All the codes used to obtain data and to perform the
|
||||||
|
analysis can be found on the report repository at
|
||||||
|
\url{https://gitea.polarolouis.fr/polarolouis/rapport-CEI-MIA-2023}.
|
||||||
|
|
||||||
\include{Rcodes/simulation/inference_analyze}
|
\include{Rcodes/simulation/inference_analyze}
|
||||||
\include{Rcodes/simulation/model_selection_analyze}
|
\include{Rcodes/simulation/model_selection_analyze}
|
||||||
\include{Rcodes/simulation/netclustering_analyze}
|
\include{Rcodes/simulation/netclustering_analyze}
|
||||||
|
|
|
||||||