Cleaning repo and continue working and fixing netclust inference
This commit is contained in:
parent
2f561d57ce
commit
bd6a12b8df
22 changed files with 135 additions and 3724 deletions
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -227,7 +227,7 @@ kable(proportion_preferred_table,
|
|||
#| fig.height = 4,
|
||||
#| dpi=300
|
||||
|
||||
proportion_preferred_data %>% ggplot() +
|
||||
plot <- proportion_preferred_data %>% ggplot() +
|
||||
aes(
|
||||
x = epsilon_alpha, y = prop_model, color = preferred_model,
|
||||
fill = preferred_model
|
||||
|
|
@ -237,12 +237,12 @@ proportion_preferred_data %>% ggplot() +
|
|||
color = guide_legend(title = "Preferred Model")
|
||||
) +
|
||||
scale_x_continuous(breaks = seq(from = 0.0, to = 0.24, by = 0.03)) +
|
||||
scale_color_okabe_ito() +
|
||||
scale_fill_okabe_ito() +
|
||||
xlab(TeX("$\\epsilon_{\\alpha}$")) +
|
||||
scale_color_okabe_ito() +
|
||||
scale_fill_okabe_ito() +
|
||||
xlab(TeX("$\\epsilon_{\\alpha}$")) +
|
||||
ylab("Model proportions") +
|
||||
geom_col(position = "stack")
|
||||
|
||||
print(plot)
|
||||
```
|
||||
|
||||
\paragraph{Results} For the model comparison, when $\eps[\alpha]$ is small
|
||||
|
|
|
|||
|
|
@ -18,8 +18,62 @@ filenames <- list.files(
|
|||
|
||||
# data_list <- lapply(filenames, function(file) lapply(readRDS(file), function(model) model$list_clustering))
|
||||
df_netclust <- do.call("rbind", lapply(filenames, readRDS))
|
||||
df_netclust$model <- factor(df_netclust$model, levels = c(
|
||||
"iid", "pi",
|
||||
"rho", "pirho"
|
||||
))
|
||||
|
||||
```
|
||||
\paragraph{Simulation settings} For all models we simulate $M = 9$ networks with
|
||||
$\forall m \in \{ 1 \dots M \} , n^m_1 = n^m_2 = 75$ with $Q_1 = Q_2 = 3$. For
|
||||
the simulations the proportions are the following:
|
||||
|
||||
\begin{align*}
|
||||
\bm{\pi}^1 = \left( 0.2, 0.3, 0.5 \right) & & \bm{\rho}^1 = \left( 0.2, 0.3, 0.5 \right)
|
||||
\end{align*}
|
||||
and for all $m = 2,\dots,9$
|
||||
\begin{align*}
|
||||
\bm{\pi}^m = \begin{cases}
|
||||
\bm{\pi}^1 & \text{for } iid\text{-}colBiSBM \\
|
||||
\sigma^1_m(\bm{\pi}^1) & \text{for } \pi\text{-}colBiSBM \text{ and } \pi\rho\text{-}colBiSBM
|
||||
\end{cases}\\
|
||||
\bm{\rho}^m =
|
||||
\begin{cases}
|
||||
\bm{\rho}^1 & \text{for } iid\text{-}colBiSBM \\
|
||||
\sigma^2_m(\bm{\rho}^1) & \text{for } \rho\text{-}colBiSBM \text{ and } \pi\rho\text{-}colBiSBM
|
||||
\end{cases}
|
||||
\end{align*}
|
||||
where $\sigma^1_m$ and $\sigma^2_m$ are permutations of {1, 2, 3} proper to network $m$ and
|
||||
$\sigma^1 (\pi)= {(\pi_{\sigma^1 (i)})}_{i=\{1,\dots,3\}}$
|
||||
and $\sigma^2 (\rho)= {(\rho_{\sigma^2 (i)})}_{i=\{1,\dots,3\}}$.
|
||||
The networks are divided into 3 sub-collections of 3
|
||||
networks with connectivity parameters as follows:
|
||||
|
||||
\begin{align*}
|
||||
\bm{\alpha}^{as} = .3 + \begin{pmatrix}
|
||||
\epsilon & - \frac{\epsilon}{2} & - \frac{\epsilon}{2}\\
|
||||
- \frac{\epsilon}{2} & \epsilon & - \frac{\epsilon}{2}\\
|
||||
- \frac{\epsilon}{2} & - \frac{\epsilon}{2} & \epsilon
|
||||
\end{pmatrix}, &&
|
||||
\bm{\alpha}^{cp} = .3 + \begin{pmatrix}
|
||||
\frac{3 \epsilon}{2} & \epsilon & \frac{\epsilon}{2}\\
|
||||
\epsilon & \frac{\epsilon}{2} & 0\\
|
||||
\frac{\epsilon}{2} & 0 & - \frac{\epsilon}{2}
|
||||
\end{pmatrix}, &&
|
||||
\bm{\alpha}^{dis} = .3 + \begin{pmatrix}
|
||||
- \frac{\epsilon}{2} & \epsilon & \epsilon\\
|
||||
\epsilon & - \frac{\epsilon}{2} & \epsilon\\
|
||||
\epsilon & \epsilon & - \frac{\epsilon}{2}
|
||||
\end{pmatrix},
|
||||
\end{align*}
|
||||
with $\epsilon \in [.1, .4]$. $\bm{\alpha}^{as}$ represents a classical
|
||||
assortative community structure,
|
||||
while $\bm{\alpha}^{cp}$ is a layered core-periphery structure with block 2
|
||||
acting as a semi-core. Finally, $\bm{\alpha}^{dis}$ is a disassortative
|
||||
community structure with stronger
|
||||
connections between blocks than within blocks. If $\epsilon = 0$, the three
|
||||
matrices are equal and the 9 networks have the same connection structure.
|
||||
Increasing $\epsilon$ differentiates the 3 sub-collections of networks.
|
||||
|
||||
```{r netclustering-ARI-boxplot, echo = FALSE}
|
||||
#| dpi = 300,
|
||||
|
|
@ -34,4 +88,10 @@ df_netclust %>%
|
|||
guides(fill = guide_legend(title = "Model")) +
|
||||
ylab("ARI of obtained netclustering") +
|
||||
geom_boxplot(aes(fill = model))
|
||||
```
|
||||
```
|
||||
|
||||
\paragraph{Results} The evaluation of our method involves a comparison between
|
||||
the resulting partition of the network collection and the simulated partition
|
||||
using the ARI index. As the value of $\epsilon$ increases, our ability to
|
||||
distinguish between the networks improves, and this distinction becomes nearly
|
||||
perfect in all setups of the $colBiSBM$.
|
||||
|
|
@ -1,8 +1,67 @@
|
|||
\section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}
|
||||
|
||||
\paragraph{Simulation settings}
|
||||
|
||||
For all models we simulate \(M = 9\) networks with
|
||||
\(\forall m \in \{ 1 \dots M \} , n^m_1 = n^m_2 = 75\) with
|
||||
\(Q_1 = Q_2 = 3\). For the simulations the proportions are the
|
||||
following:
|
||||
|
||||
\begin{align*}
|
||||
\bm{\pi}^1 = \left( 0.2, 0.3, 0.5 \right) & & \bm{\rho}^1 = \left( 0.2, 0.3, 0.5 \right)
|
||||
\end{align*} and for all \(m = 2,\dots,9\) \begin{align*}
|
||||
\bm{\pi}^m = \begin{cases}
|
||||
\bm{\pi}^1 & \text{for } iid\text{-}colBiSBM \\
|
||||
\sigma^1_m(\bm{\pi}^1) & \text{for } \pi\text{-}colBiSBM \text{ and } \pi\rho\text{-}colBiSBM
|
||||
\end{cases}\\
|
||||
\bm{\rho}^m =
|
||||
\begin{cases}
|
||||
\bm{\rho}^1 & \text{for } iid\text{-}colBiSBM \\
|
||||
\sigma^2_m(\bm{\rho}^1) & \text{for } \rho\text{-}colBiSBM \text{ and } \pi\rho\text{-}colBiSBM
|
||||
\end{cases}
|
||||
\end{align*} where \(\sigma^1_m\) and \(\sigma^2_m\) are permutations of
|
||||
\{1, 2, 3\} proper to network \(m\) and
|
||||
\(\sigma^1 (\pi)= {(\pi_{\sigma^1 (i)})}_{i=\{1,\dots,3\}}\) and
|
||||
\(\sigma^2 (\rho)= {(\rho_{\sigma^2 (i)})}_{i=\{1,\dots,3\}}\). The
|
||||
networks are divided into 3 sub-collections of 3 networks with
|
||||
connectivity parameters as follows:
|
||||
|
||||
\begin{align*}
|
||||
\bm{\alpha}^{as} = .3 + \begin{pmatrix}
|
||||
\epsilon & - \frac{\epsilon}{2} & - \frac{\epsilon}{2}\\
|
||||
- \frac{\epsilon}{2} & \epsilon & - \frac{\epsilon}{2}\\
|
||||
- \frac{\epsilon}{2} & - \frac{\epsilon}{2} & \epsilon
|
||||
\end{pmatrix}, &&
|
||||
\bm{\alpha}^{cp} = .3 + \begin{pmatrix}
|
||||
\frac{3 \epsilon}{2} & \epsilon & \frac{\epsilon}{2}\\
|
||||
\epsilon & \frac{\epsilon}{2} & 0\\
|
||||
\frac{\epsilon}{2} & 0 & - \frac{\epsilon}{2}
|
||||
\end{pmatrix}, &&
|
||||
\bm{\alpha}^{dis} = .3 + \begin{pmatrix}
|
||||
- \frac{\epsilon}{2} & \epsilon & \epsilon\\
|
||||
\epsilon & - \frac{\epsilon}{2} & \epsilon\\
|
||||
\epsilon & \epsilon & - \frac{\epsilon}{2}
|
||||
\end{pmatrix},
|
||||
\end{align*} with \(\epsilon \in [.1, .4]\). \(\bm{\alpha}^{as}\)
|
||||
represents a classical assortative community structure, while
|
||||
\(\bm{\alpha}^{cp}\) is a layered core-periphery structure with block 2
|
||||
acting as a semi-core. Finally, \(\bm{\alpha}^{dis}\) is a
|
||||
disassortative community structure with stronger connections between
|
||||
blocks than within blocks. If \(\epsilon = 0\), the three matrices are
|
||||
equal and the 9 networks have the same connection structure. Increasing
|
||||
\(\epsilon\) differentiates the 3 sub-collections of networks.
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{./img/99d363f6aa43bf0eba413cb994dc00b130709107.png}
|
||||
\includegraphics{./img/ca0adc96e26b9b41eb8dec4c472696309ebcf0fe.png}
|
||||
\caption{\label{}ARI of the partition obtained by clustering in function
|
||||
of \(\eps\)}
|
||||
\end{figure}
|
||||
|
||||
\paragraph{Results}
|
||||
|
||||
The evaluation of our method involves a comparison between the resulting
|
||||
partition of the network collection and the simulated partition using
|
||||
the ARI index. As the value of \(\epsilon\) increases, our ability to
|
||||
distinguish between the networks improves, and this distinction becomes
|
||||
nearly perfect in all setups of the \(colBiSBM\).
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ if (!exists("model_to_test")) {
|
|||
}
|
||||
|
||||
if (!exists("repetitions")) {
|
||||
repetitions <- seq.int(3)
|
||||
repetitions <- seq.int(30)
|
||||
}
|
||||
|
||||
nr <- 75
|
||||
|
|
@ -32,7 +32,7 @@ if (identical(arg, character(0))) {
|
|||
|
||||
conditions <- tidyr::crossing(epsilons, pi, rho, repetitions)
|
||||
|
||||
results <- lapply(seq_len(nrow(conditions)), function(s) {
|
||||
results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
|
||||
eps <- conditions[s, ]$epsilons
|
||||
current_pi <- conditions[s, ]$pi
|
||||
current_rho <- conditions[s, ]$rho
|
||||
|
|
@ -195,6 +195,9 @@ results <- lapply(seq_len(nrow(conditions)), function(s) {
|
|||
)
|
||||
|
||||
best_partitions <- unlist(extract_best_bipartite_partition(list_collection))
|
||||
if (!is(best_partitions, "list")) {
|
||||
best_partitions <- list(best_partitions)
|
||||
}
|
||||
clustering <- unlist(lapply(seq_along(best_partitions), function(col_idx) {
|
||||
setNames(
|
||||
rep(col_idx, best_partitions[[col_idx]]$M),
|
||||
|
|
@ -206,15 +209,13 @@ results <- lapply(seq_len(nrow(conditions)), function(s) {
|
|||
ari <- aricode::ARI(rep(c(1, 2, 3), each = 3), clustering)
|
||||
|
||||
toc()
|
||||
cat(paste("Finished", s))
|
||||
return(
|
||||
data.frame(epsilon = eps, model = model_to_test, ARI = ari)
|
||||
)
|
||||
}
|
||||
# ,
|
||||
# mc.cores = parallel::detectCores() - 1,
|
||||
# mc.progress = TRUE,
|
||||
# mc.retry = -1
|
||||
},
|
||||
mc.cores = parallel::detectCores() - 1,
|
||||
mc.progress = TRUE,
|
||||
mc.retry = -1
|
||||
)
|
||||
|
||||
data_frame_result <- do.call("rbind", results)
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 49 KiB After Width: | Height: | Size: 58 KiB |
BIN
img/6a5c3c2748922aace8a2034349434383ce4a9f11.png
Normal file
BIN
img/6a5c3c2748922aace8a2034349434383ce4a9f11.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 58 KiB |
BIN
img/ca0adc96e26b9b41eb8dec4c472696309ebcf0fe.png
Normal file
BIN
img/ca0adc96e26b9b41eb8dec4c472696309ebcf0fe.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 58 KiB |
BIN
img/d424b38c3b69ae646295e877eee9ae4e8602ec6c.png
Normal file
BIN
img/d424b38c3b69ae646295e877eee9ae4e8602ec6c.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 86 KiB |
BIN
rapport.pdf
BIN
rapport.pdf
Binary file not shown.
Loading…
Add table
Reference in a new issue