Adding work BOD 7/7/23

This commit is contained in:
Louis Lacoste 2023-07-07 10:44:15 +02:00
parent 37e5b958a2
commit 767f2f86c7
14 changed files with 545 additions and 343 deletions

View file

@ -4,7 +4,7 @@
{ {
"match": ".*\\.Rmd", "match": ".*\\.Rmd",
"isAsync": true, "isAsync": true,
"cmd": "Rscript Rmd2Latex-fragment.R '${file}' " "cmd": "/bin/bash -c \"Rscript Rmd2Latex-fragment.R '${file}'\""
}, },
{ {
"match": ".*", "match": ".*",

View file

@ -0,0 +1,196 @@
```{r libraries, echo = FALSE, include = FALSE}
require("ggplot2")
require("tidyr")
require("dplyr")
require("stringr")
require("knitr")
require("pander")
require("patchwork")
require("latex2exp")
```
```{r setup, echo = FALSE}
options(dplyr.summarise.inform = FALSE)
knitr::opts_knit$set(kable.force.latex = TRUE)
meanse <- function(x, ...) {
mean1 <- signif(round(mean(x, na.rm = T), 2), 5) # calculate mean and round
se1 <- signif(round(sd(x, na.rm = T) / sqrt(sum(!is.na(x))), 2), 2) # std error - round adding zeros
out <- paste(mean1, "$\\pm$", se1) # paste together mean plus/minus and standard error
if (str_detect(out, "NA")) {
out <- "NA"
} # if missing do not add plusminus
if (se1 == 0) {
out <- paste(mean1)
}
return(out)
}
```
```{r import-data, echo = FALSE}
filenames <- list.files(
path = "./data/",
pattern = "inference_testing_2023-07-*",
full.names = TRUE
)
data_list <- lapply(filenames, readRDS)
col_id_BICLS <- c(11, 16, 23, 30, 37)
result_data_frame <- dplyr::bind_rows(data_list)
# Compute the preferred model
result_data_frame <- cbind(result_data_frame, preferred_model = sapply(seq_len(nrow(result_data_frame)), function(n) sub("_BICL", "", names(which.max(result_data_frame[n, col_id_BICLS])))))
```
# Efficiency of the inference
\paragraph{Simulation settings} For this simulation the data is simulated with
$M = 2, n_{1}^{m} = 120, n_{2}^{m} = 120, Q_1 = Q_2 = 4$, $\bm{\alpha}, \bm{\pi}$
and $\bm{\rho}$ are set as follows:
\begin{align*}
&&\bm{\alpha} = .25 +
\begin{pmatrix}
3 \eps[\alpha] & 2 \eps[\alpha] & \eps[\alpha] & - \eps[\alpha]\\
2 \eps[\alpha] & 2 \eps[\alpha] & - \eps[\alpha] & \eps[\alpha]\\
\eps[\alpha] & - \eps[\alpha] & \eps[\alpha] & 2 \eps[\alpha]\\
- \eps[\alpha] & \eps[\alpha] & 2 \eps[\alpha] & 0
\end{pmatrix}, \\ \bm{\pi}^1 = \sigma_1
\begin{pmatrix}
0.2 & 0.4 & 0.4 & 0
\end{pmatrix},
&& \bm{\pi}^2 =
\begin{pmatrix}
0.25 & 0.25 & 0.25 & 0.25
\end{pmatrix}, \\
\bm{\rho}^1 =
\begin{pmatrix}
0.25 & 0.25 & 0.25 & 0.25
\end{pmatrix}, &&
\bm{\rho}^2 = \sigma_2
\begin{pmatrix}
0 & 0.33 & 0.33 & 0.33
\end{pmatrix}, &&
\end{align*}
with $\eps[\alpha]$ taking nine equally spaced values ranging from 0 to 0.24.
For each value of $\eps[\alpha]$, 108 datasets ($X_1, X_2$) are simulated,
resulting in $9 \times 108 = 972$ datasets. More precisely, for each dataset,
we pick uniformly at random two permutations of $\{ 1, \dots , 4 \}$
($\sigma_1, \sigma_2$) with the constraint that $\sigma_1(4) \neq \sigma_2(1)$.
This ensures that each of the two networks have a non-empty block that is empty
in the other one. Then the networks are simulated with
$\mathcal{B}$ern-$BiSBM_{120}(4, \bm{\alpha}, \bm{\pi}^m, \bm{\rho}^m)$
with the previous parameters. Each network has 2 blocks in common and their
connectivity structures encompass a mix of core-periphery, assortative
community and disassortative community structures, depending on which 3 of the 4
blocks are selected for each network. $\eps[\alpha]$ represents the strength of
these structures, the larger, the easier it is to tell apart one block from
another.
\paragraph{Inference} We want to measure the quality of the
inference procedure, for this we use the inference described in the section
\ref{sec:variational-estimation-of-the-parameters}.
\paragraph{Quality indicators} To assess the quality of the inference, we will
use the following indicators:
\begin{itemize}
\item First, for each dataset, we put in competition $\pi\text{-}colBiSBM$ with
$sep\text{-}BiSBM$, $iid\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
$\pi\rho\text{-}colBiSBM$
respectively. To do so, for each dataset, we compute the
BIC-L of each model $\pi\text{-}colBiSBM$ is preferred to $sep\text{-}BiSBM$
(resp. $iid\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
$\pi\rho\text{-}colBiSBM$) if
its BIC-L is greater.
\item When considering $\pi\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
$\pi\rho\text{-}colBiSBM$ we compare $\widehat{Q_1}$, $\widehat{Q_2}$ to
their true values. ($Q_1 = 4$ and $Q_2 = 4$)
\item Finally, we assess the quality of the node grouping by computing the
Adjusted Rand Index \parencite[][, ARI = 0 for a random grouping, ARI = 1 for a perfect recovery]{hubertComparingPartitions1985}. For each network, for the
$\pi\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
$\pi\rho\text{-}colBiSBM$ we compare the inferred block memberships to the
real ones by computing the mean of the ARI per axis over the two networks
\begin{equation*}
\overline{\text{ARI}}_d = \frac{1}{2} \text{ARI}\big( \text{ARI}(\widehat{\bm{Z}^1_d},\bm{Z}^1_d) + \text{ARI}(\widehat{\bm{Z}^2_d},\bm{Z}^2_d) \big)
\end{equation*}
where $d$ is the dimension or axis (i.e., rows, $d=1$, or columns, $d=2$) of
the block memberships.
And we compute the ARI of the whole set of nodes to account for block
pairing between networks
\begin{equation*}
\text{ARI}_d = \text{ARI}\big((\widehat{\bm{Z}^1_d},\widehat{\bm{Z}^2_d}),(\bm{Z}^1_d,\bm{Z}^2_d) \big)
\end{equation*}
\end{itemize}
All these quality indicators are averaged over the 108 datasets. The results are
provided in the tables \ref{tab:per_model_sep} to \ref{tab:per_model_pirho}. Each line corresponds to the
108 datasets for a given value of value of $\eps[\alpha]$.
```{r inference_table, echo = FALSE}
averaged_data <- result_data_frame %>%
group_by(epsilon_alpha) %>%
summarise(across(-preferred_model, list("avrg" = meanse))) %>%
select(-c(2:10))
averaged_data <- averaged_data %>%
select(which(!grepl("*_BICL_*", colnames(averaged_data)),
arr.ind = TRUE))
```
```{r function_per_model, echo = FALSE}
dataframe_per_model <- function(model) {
averaged_data %>%
select(epsilon_alpha, starts_with(paste0(model, "_")))
}
```
```{r per_model_table, echo = FALSE, results='asis', message=FALSE, warning = FALSE}
for (model in c("sep", "iid", "pi", "rho", "pirho")) {
kable_colnames <- c(
"$\\eps[\\alpha]$", #"BIC-L",
"$\\overline{\\text{ARI}}_{1}$",
"$\\overline{\\text{ARI}}_{2}$", "$\\text{ARI}_{1}$", "$\\text{ARI}_{2}$"
)
model_name <- model
if (model != "sep") {
kable_colnames <- c(
kable_colnames, "Recovered $Q_1$",
"Recovered $Q_2$"
)
}
if (model == "pirho") {
model_name <- "$\\pi\\rho$"
} else {
if (model != "iid" && model != "sep") model_name <- paste0("$\\", model, "$")
}
print(kable(dataframe_per_model(model),
escape = FALSE,
booktabs = TRUE,
digits = 2,
position = "!h",
caption = paste0(
"\\label{tab:per_model_", model,
"}Quality metrics for ",
paste0(model_name, "-colBiSBM")
),
col.names = kable_colnames
))
}
```
```{r proportion-preferred_model, echo = FALSE}
proportion_preferred_table <- result_data_frame %>%
group_by(epsilon_alpha, preferred_model) %>%
summarise(n = n()) %>%
mutate(freq = n / sum(n)) %>%
ungroup() %>%
select(-n) %>%
pivot_wider(
names_from = preferred_model,
values_from = freq, values_fill = 0
)
```
\paragraph{Results} For the model comparison, when $\eps[\alpha]$ is small
($\eps[\alpha]\in[0, .04]$), the simulation model is close to the
Erd\H{o}s-Reńyi network and it is very hard to find any structure beyond the one
of a single block on each dimension.

View file

@ -0,0 +1,205 @@
\hypertarget{efficiency-of-the-inference}{%
\section{Efficiency of the
inference}\label{efficiency-of-the-inference}}
\paragraph{Simulation settings}
For this simulation the data is simulated with
\(M = 2, n_{1}^{m} = 120, n_{2}^{m} = 120, Q_1 = Q_2 = 4\),
\(\bm{\alpha}, \bm{\pi}\) and \(\bm{\rho}\) are set as follows:
\begin{align*}
&&\bm{\alpha} = .25 +
\begin{pmatrix}
3 \eps[\alpha] & 2 \eps[\alpha] & \eps[\alpha] & - \eps[\alpha]\\
2 \eps[\alpha] & 2 \eps[\alpha] & - \eps[\alpha] & \eps[\alpha]\\
\eps[\alpha] & - \eps[\alpha] & \eps[\alpha] & 2 \eps[\alpha]\\
- \eps[\alpha] & \eps[\alpha] & 2 \eps[\alpha] & 0
\end{pmatrix}, \\ \bm{\pi}^1 = \sigma_1
\begin{pmatrix}
0.2 & 0.4 & 0.4 & 0
\end{pmatrix},
&& \bm{\pi}^2 =
\begin{pmatrix}
0.25 & 0.25 & 0.25 & 0.25
\end{pmatrix}, \\
\bm{\rho}^1 =
\begin{pmatrix}
0.25 & 0.25 & 0.25 & 0.25
\end{pmatrix}, &&
\bm{\rho}^2 = \sigma_2
\begin{pmatrix}
0 & 0.33 & 0.33 & 0.33
\end{pmatrix}, &&
\end{align*} with \(\eps[\alpha]\) taking nine equally spaced values
ranging from 0 to 0.24. For each value of \(\eps[\alpha]\), 108 datasets
(\(X_1, X_2\)) are simulated, resulting in \(9 \times 108 = 972\)
datasets. More precisely, for each dataset, we pick uniformly at random
two permutations of \(\{ 1, \dots , 4 \}\) (\(\sigma_1, \sigma_2\)) with
the constraint that \(\sigma_1(4) \neq \sigma_2(1)\). This ensures that
each of the two networks have a non-empty block that is empty in the
other one. Then the networks are simulated with
\(\mathcal{B}\)ern-\(BiSBM_{120}(4, \bm{\alpha}, \bm{\pi}^m, \bm{\rho}^m)\)
with the previous parameters. Each network has 2 blocks in common and
their connectivity structures encompass a mix of core-periphery,
assortative community and disassortative community structures, depending
on which 3 of the 4 blocks are selected for each network.
\(\eps[\alpha]\) represents the strength of these structures, the
larger, the easier it is to tell apart one block from another.
\paragraph{Inference}
We want to measure the quality of the inference procedure, for this we
use the inference described in the section
\ref{sec:variational-estimation-of-the-parameters}.
\paragraph{Quality indicators}
To assess the quality of the inference, we will use the following
indicators:
\begin{itemize}
\item First, for each dataset, we put in competition $\pi\text{-}colBiSBM$ with
$sep\text{-}BiSBM$, $iid\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
$\pi\rho\text{-}colBiSBM$
respectively. To do so, for each dataset, we compute the
BIC-L of each model $\pi\text{-}colBiSBM$ is preferred to $sep\text{-}BiSBM$
(resp. $iid\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
$\pi\rho\text{-}colBiSBM$) if
its BIC-L is greater.
\item When considering $\pi\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
$\pi\rho\text{-}colBiSBM$ we compare $\widehat{Q_1}$, $\widehat{Q_2}$ to
their true values. ($Q_1 = 4$ and $Q_2 = 4$)
\item Finally, we assess the quality of the node grouping by computing the
Adjusted Rand Index \parencite[][, ARI = 0 for a random grouping, ARI = 1 for a perfect recovery]{hubertComparingPartitions1985}. For each network, for the
$\pi\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
$\pi\rho\text{-}colBiSBM$ we compare the inferred block memberships to the
real ones by computing the mean of the ARI per axis over the two networks
\begin{equation*}
\overline{\text{ARI}}_d = \frac{1}{2} \text{ARI}\big( \text{ARI}(\widehat{\bm{Z}^1_d},\bm{Z}^1_d) + \text{ARI}(\widehat{\bm{Z}^2_d},\bm{Z}^2_d) \big)
\end{equation*}
where $d$ is the dimension or axis (i.e., rows, $d=1$, or columns, $d=2$) of
the block memberships.
And we compute the ARI of the whole set of nodes to account for block
pairing between networks
\begin{equation*}
\text{ARI}_d = \text{ARI}\big((\widehat{\bm{Z}^1_d},\widehat{\bm{Z}^2_d}),(\bm{Z}^1_d,\bm{Z}^2_d) \big)
\end{equation*}
\end{itemize}
All these quality indicators are averaged over the 108 datasets. The
results are provided in the tables \ref{tab:per_model_sep} to
\ref{tab:per_model_pirho}. Each line corresponds to the 108 datasets for
a given value of value of \(\eps[\alpha]\).
\begin{table}[!h]
\caption{\label{tab:per_model_table}\label{tab:per_model_sep}Quality metrics for sep-colBiSBM}
\centering
\begin{tabular}[t]{rllll}
\toprule
$\eps[\alpha]$ & $\overline{\text{ARI}}_{1}$ & $\overline{\text{ARI}}_{2}$ & $\text{ARI}_{1}$ & $\text{ARI}_{2}$\\
\midrule
0.00 & 0 & 0 & 0 & 0\\
0.03 & 0 & 0 & 0 & 0\\
0.06 & 0.1 $\pm$ 0.01 & 0.08 $\pm$ 0.01 & 0.06 $\pm$ 0.01 & 0.05 $\pm$ 0.01\\
0.09 & 0.71 $\pm$ 0.02 & 0.7 $\pm$ 0.01 & 0.37 $\pm$ 0.02 & 0.37 $\pm$ 0.02\\
0.12 & 0.94 $\pm$ 0.01 & 0.93 $\pm$ 0.01 & 0.5 $\pm$ 0.02 & 0.49 $\pm$ 0.02\\
\addlinespace
0.15 & 0.99 & 0.99 & 0.54 $\pm$ 0.02 & 0.49 $\pm$ 0.01\\
0.18 & 0.99 & 0.99 & 0.52 $\pm$ 0.02 & 0.52 $\pm$ 0.02\\
0.21 & 0.99 & 0.99 & 0.54 $\pm$ 0.02 & 0.52 $\pm$ 0.02\\
0.24 & 1 & 1 & 0.55 $\pm$ 0.02 & 0.52 $\pm$ 0.02\\
\bottomrule
\end{tabular}
\end{table}
\begin{table}[!h]
\caption{\label{tab:per_model_table}\label{tab:per_model_iid}Quality metrics for iid-colBiSBM}
\centering
\begin{tabular}[t]{rllllll}
\toprule
$\eps[\alpha]$ & $\overline{\text{ARI}}_{1}$ & $\overline{\text{ARI}}_{2}$ & $\text{ARI}_{1}$ & $\text{ARI}_{2}$ & Recovered $Q_1$ & Recovered $Q_2$\\
\midrule
0.00 & 0 & 0 & 0 & 0 & 1 & 1\\
0.03 & 0 & 0 & 0 & 0 & 1 & 1\\
0.06 & 0.08 $\pm$ 0.01 & 0.08 $\pm$ 0.01 & 0.08 $\pm$ 0.01 & 0.07 $\pm$ 0.01 & 1.4 $\pm$ 0.05 & 1.49 $\pm$ 0.05\\
0.09 & 0.72 $\pm$ 0.01 & 0.71 $\pm$ 0.01 & 0.53 $\pm$ 0.02 & 0.52 $\pm$ 0.02 & 3.4 $\pm$ 0.06 & 3.41 $\pm$ 0.06\\
0.12 & 0.94 & 0.93 & 0.75 $\pm$ 0.03 & 0.72 $\pm$ 0.03 & 4.06 $\pm$ 0.02 & 3.97 $\pm$ 0.02\\
\addlinespace
0.15 & 0.98 & 0.98 & 0.77 $\pm$ 0.03 & 0.76 $\pm$ 0.03 & 4.11 $\pm$ 0.03 & 4.11 $\pm$ 0.03\\
0.18 & 0.99 & 0.99 & 0.82 $\pm$ 0.03 & 0.82 $\pm$ 0.03 & 4.15 $\pm$ 0.04 & 4.13 $\pm$ 0.03\\
0.21 & 0.99 & 0.99 & 0.8 $\pm$ 0.02 & 0.79 $\pm$ 0.03 & 4.35 $\pm$ 0.06 & 4.19 $\pm$ 0.04\\
0.24 & 0.99 & 0.99 & 0.77 $\pm$ 0.03 & 0.77 $\pm$ 0.03 & 4.3 $\pm$ 0.06 & 4.43 $\pm$ 0.07\\
\bottomrule
\end{tabular}
\end{table}
\begin{table}[!h]
\caption{\label{tab:per_model_table}\label{tab:per_model_pi}Quality metrics for $\pi$-colBiSBM}
\centering
\begin{tabular}[t]{rllllll}
\toprule
$\eps[\alpha]$ & $\overline{\text{ARI}}_{1}$ & $\overline{\text{ARI}}_{2}$ & $\text{ARI}_{1}$ & $\text{ARI}_{2}$ & Recovered $Q_1$ & Recovered $Q_2$\\
\midrule
0.00 & 0 & 0 & 0 & 0 & 1 & 1\\
0.03 & 0 & 0 & 0 & 0 & 1.01 $\pm$ 0.01 & 1\\
0.06 & 0.07 $\pm$ 0.01 & 0.08 $\pm$ 0.01 & 0.07 $\pm$ 0.01 & 0.06 $\pm$ 0.01 & 1.49 $\pm$ 0.05 & 1.5 $\pm$ 0.05\\
0.09 & 0.73 $\pm$ 0.02 & 0.72 $\pm$ 0.01 & 0.56 $\pm$ 0.02 & 0.53 $\pm$ 0.02 & 3.78 $\pm$ 0.07 & 3.37 $\pm$ 0.07\\
0.12 & 0.96 & 0.93 & 0.79 $\pm$ 0.02 & 0.74 $\pm$ 0.03 & 4.46 $\pm$ 0.07 & 3.95 $\pm$ 0.02\\
\addlinespace
0.15 & 0.99 & 0.97 & 0.82 $\pm$ 0.02 & 0.76 $\pm$ 0.03 & 4.62 $\pm$ 0.08 & 4\\
0.18 & 1 & 0.98 & 0.83 $\pm$ 0.02 & 0.79 $\pm$ 0.03 & 4.65 $\pm$ 0.09 & 4\\
0.21 & 1 & 0.98 & 0.84 $\pm$ 0.02 & 0.79 $\pm$ 0.03 & 4.69 $\pm$ 0.1 & 4\\
0.24 & 1 & 0.99 & 0.86 $\pm$ 0.02 & 0.79 $\pm$ 0.03 & 4.74 $\pm$ 0.11 & 4.01 $\pm$ 0.01\\
\bottomrule
\end{tabular}
\end{table}
\begin{table}[!h]
\caption{\label{tab:per_model_table}\label{tab:per_model_rho}Quality metrics for $\rho$-colBiSBM}
\centering
\begin{tabular}[t]{rllllll}
\toprule
$\eps[\alpha]$ & $\overline{\text{ARI}}_{1}$ & $\overline{\text{ARI}}_{2}$ & $\text{ARI}_{1}$ & $\text{ARI}_{2}$ & Recovered $Q_1$ & Recovered $Q_2$\\
\midrule
0.00 & 0 & 0 & 0 & 0 & 1 & 1\\
0.03 & 0 & 0 & 0 & 0 & 1.01 $\pm$ 0.01 & 1.01 $\pm$ 0.01\\
0.06 & 0.08 $\pm$ 0.01 & 0.08 $\pm$ 0.01 & 0.06 $\pm$ 0.01 & 0.07 $\pm$ 0.01 & 1.39 $\pm$ 0.05 & 1.6 $\pm$ 0.06\\
0.09 & 0.72 $\pm$ 0.01 & 0.72 $\pm$ 0.01 & 0.53 $\pm$ 0.02 & 0.54 $\pm$ 0.02 & 3.39 $\pm$ 0.07 & 3.74 $\pm$ 0.07\\
0.12 & 0.93 & 0.95 & 0.71 $\pm$ 0.03 & 0.75 $\pm$ 0.02 & 3.95 $\pm$ 0.02 & 4.5 $\pm$ 0.07\\
\addlinespace
0.15 & 0.97 & 0.99 & 0.78 $\pm$ 0.03 & 0.81 $\pm$ 0.02 & 4 & 4.49 $\pm$ 0.07\\
0.18 & 0.98 & 1 & 0.76 $\pm$ 0.03 & 0.81 $\pm$ 0.02 & 4.01 $\pm$ 0.01 & 4.71 $\pm$ 0.09\\
0.21 & 0.98 & 1 & 0.76 $\pm$ 0.03 & 0.81 $\pm$ 0.02 & 4.03 $\pm$ 0.02 & 4.72 $\pm$ 0.09\\
0.24 & 0.98 & 1 & 0.74 $\pm$ 0.03 & 0.8 $\pm$ 0.02 & 4.06 $\pm$ 0.02 & 4.8 $\pm$ 0.1\\
\bottomrule
\end{tabular}
\end{table}
\begin{table}[!h]
\caption{\label{tab:per_model_table}\label{tab:per_model_pirho}Quality metrics for $\pi\rho$-colBiSBM}
\centering
\begin{tabular}[t]{rllllll}
\toprule
$\eps[\alpha]$ & $\overline{\text{ARI}}_{1}$ & $\overline{\text{ARI}}_{2}$ & $\text{ARI}_{1}$ & $\text{ARI}_{2}$ & Recovered $Q_1$ & Recovered $Q_2$\\
\midrule
0.00 & 0 & 0 & 0 & 0 & 1 & 1\\
0.03 & 0 & 0 & 0 & 0 & 1.01 $\pm$ 0.01 & 1.01 $\pm$ 0.01\\
0.06 & 0.07 $\pm$ 0.01 & 0.07 $\pm$ 0.01 & 0.07 $\pm$ 0.01 & 0.06 $\pm$ 0.01 & 1.48 $\pm$ 0.05 & 1.57 $\pm$ 0.06\\
0.09 & 0.74 $\pm$ 0.01 & 0.73 $\pm$ 0.01 & 0.56 $\pm$ 0.03 & 0.55 $\pm$ 0.02 & 3.69 $\pm$ 0.06 & 3.66 $\pm$ 0.06\\
0.12 & 0.96 $\pm$ 0.01 & 0.95 $\pm$ 0.01 & 0.73 $\pm$ 0.03 & 0.73 $\pm$ 0.03 & 4.31 $\pm$ 0.05 & 4.26 $\pm$ 0.05\\
\addlinespace
0.15 & 0.99 & 0.99 & 0.79 $\pm$ 0.02 & 0.78 $\pm$ 0.03 & 4.31 $\pm$ 0.05 & 4.35 $\pm$ 0.05\\
0.18 & 1 & 1 & 0.83 $\pm$ 0.02 & 0.83 $\pm$ 0.02 & 4.31 $\pm$ 0.05 & 4.25 $\pm$ 0.04\\
0.21 & 1 & 1 & 0.77 $\pm$ 0.03 & 0.77 $\pm$ 0.03 & 4.42 $\pm$ 0.05 & 4.34 $\pm$ 0.05\\
0.24 & 1 & 1 & 0.82 $\pm$ 0.02 & 0.82 $\pm$ 0.02 & 4.25 $\pm$ 0.04 & 4.31 $\pm$ 0.05\\
\bottomrule
\end{tabular}
\end{table}
\paragraph{Results}
For the model comparison, when \(\eps[\alpha]\) is small
(\(\eps[\alpha]\in[0, .04]\)), the simulation model is close to the
Erd\H{o}s-Reńyi network and it is very hard to find any structure beyond
the one of a single block on each dimension.

View file

@ -1,11 +1,18 @@
```{r libraries, echo = FALSE, include = FALSE} ```{r libraries, echo = FALSE, include = FALSE}
require("ggplot2") require("ggplot2")
require("knitr")
# require("kableExtra")
require("tidyr") require("tidyr")
require("dplyr") require("dplyr")
require("patchwork") require("patchwork")
require("latex2exp") require("latex2exp")
``` ```
```{r setup, echo = FALSE, include= FALSE}
options(knitr.table.knitr.table.format = "latex")
```
```{r import-data, echo = FALSE} ```{r import-data, echo = FALSE}
filenames <- list.files( filenames <- list.files(
path = "./data/", path = "./data/",
@ -113,7 +120,7 @@ Finally, when $\eps[\pi] > 0$ or $\bm{\pi}^1 \neq \bm{\pi}^2$ and
$\eps[\rho] > 0$ or $\bm{\rho}^1 \neq \bm{\rho}^2$, $\eps[\rho] > 0$ or $\bm{\rho}^1 \neq \bm{\rho}^2$,
the model is a $\pi\rho\text{-}colBiSBM$. the model is a $\pi\rho\text{-}colBiSBM$.
```{r tables, echo = FALSE} ```{r tables, echo = FALSE, results='asis'}
kable( kable(
(model_comparison_eps_pi %>% (model_comparison_eps_pi %>%
select(-one_of("n")) %>% select(-one_of("n")) %>%
@ -121,23 +128,28 @@ kable(
names_from = preferred_model, names_from = preferred_model,
values_from = prop_model, values_from = prop_model,
values_fill = 0 values_fill = 0
) %>% group_by(epsilon_pi) %>% ) %>% group_by(epsilon_pi) %>%
summarise(rec_Q1 = mean(rec_Q1), summarise(
iid = sum(iid), rec_Q1 = mean(rec_Q1),
pi = sum(pi), iid = sum(iid),
rho = sum(rho), pi = sum(pi),
pirho = sum(pirho)))[,c(1,3:6, 2)], rho = sum(rho),
pirho = sum(pirho)
))[, c(1, 3:6, 2)],
digits = 2, digits = 2,
col.names = c( col.names = c(
"$\\eps[\\pi]$", "$\\eps[\\pi]$",
"$iid\\text{-}colBiSBM$", "$iid\\text{-}colBiSBM$ ",
"$\\pi\\text{-}colBiSBM$", "$\\pi\\text{-}colBiSBM$",
"$\\rho\\text{-}colBiSBM$", "$\\rho\\text{-}colBiSBM$",
"$\\pi\\rho\\text{-}colBiSBM$", "$\\pi\\rho\\text{-}colBiSBM$",
"Recovered $Q_1$" "Recovered $Q_1$"
), align = "lcccc", ), align = "lcccc",
booktab = TRUE,
position = "!h",
escape = FALSE,
caption = "\\label{tab:pi-model-sel}Model selection for varying $\\pi$ mixture parameters" caption = "\\label{tab:pi-model-sel}Model selection for varying $\\pi$ mixture parameters"
) ) %>% kableExtra::add_header_above(c(" " = 1, "Proportions of model selection" = 4, "Blocks" = 1))
kable( kable(
(model_comparison_eps_rho %>% (model_comparison_eps_rho %>%
select(-one_of("n")) %>% select(-one_of("n")) %>%
@ -154,14 +166,17 @@ kable(
digits = 2, digits = 2,
col.names = c( col.names = c(
"$\\eps[\\rho]$", "$\\eps[\\rho]$",
"$iid\\text{-}colBiSBM$", "$iid\\text{-}colBiSBM$ ",
"$\\pi\\text{-}colBiSBM$", "$\\pi\\text{-}colBiSBM$",
"$\\rho\\text{-}colBiSBM$", "$\\rho\\text{-}colBiSBM$",
"$\\pi\\rho\\text{-}colBiSBM$", "$\\pi\\rho\\text{-}colBiSBM$",
"Recovered $Q_2$" "Recovered $Q_2$"
), align = "lcccc", ), align = "lcccc",
booktab = TRUE,
position = "!h",
escape = FALSE,
caption = "\\label{tab:rho-model-sel}Model selection for varying $\\rho$ mixture parameters" caption = "\\label{tab:rho-model-sel}Model selection for varying $\\rho$ mixture parameters"
) ) %>% kableExtra::add_header_above(c(" " = 1, "Proportions of model selection" = 4, "Blocks" = 1))
``` ```
\begin{figure}[H] \begin{figure}[H]
@ -170,7 +185,7 @@ kable(
\label{fig:pref_model_func_eps} \label{fig:pref_model_func_eps}
\end{figure} \end{figure}
On the figure \ref{fig:pref_model_func_eps} and tables \ref{tab:pi-model-sel} \paragraph{Results:}On the figure \ref{fig:pref_model_func_eps} and tables \ref{tab:pi-model-sel}
and \ref{tab:rho-model-sel}, one can see that there is a turning and \ref{tab:rho-model-sel}, one can see that there is a turning
point around $\eps[\pi] = 0.2$ (resp. $\eps[\rho] = 0.2$), before which point around $\eps[\pi] = 0.2$ (resp. $\eps[\rho] = 0.2$), before which
$iid\text{-}colBiSBM$ $iid\text{-}colBiSBM$
@ -180,5 +195,5 @@ $\rho\text{-}colBiSBM$) and
$\pi\rho\text{-}colBiSBM$ gets more and more selected, highlighting our $\pi\rho\text{-}colBiSBM$ gets more and more selected, highlighting our
capacity to recover the simulated structure. capacity to recover the simulated structure.
Please note that when "Recovered $Q_1$(or $Q_2$)" is not an integer it's because \paragraph*{Remark:} Please note that when "Recovered $Q_1$(or $Q_2$)" is not an integer it's because
some procedures returned a value other than 3. some procedures returned a value other than 3.

View file

@ -49,315 +49,53 @@ is a \(\rho\text{-}colBiSBM\). Finally, when \(\eps[\pi] > 0\) or
\(\bm{\rho}^1 \neq \bm{\rho}^2\), the model is a \(\bm{\rho}^1 \neq \bm{\rho}^2\), the model is a
\(\pi\rho\text{-}colBiSBM\). \(\pi\rho\text{-}colBiSBM\).
\begin{longtable}[]{@{}lccccl@{}} \begin{table}[!h]
\caption{\label{tab:pi-model-sel}Model selection for varying \(\pi\)
mixture parameters}\tabularnewline
\toprule
\begin{minipage}[b]{0.08\columnwidth}\raggedright
\(\eps[\pi]\)\strut
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering
\(iid\text{-}colBiSBM\)\strut
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering
\(\pi\text{-}colBiSBM\)\strut
\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\centering
\(\rho\text{-}colBiSBM\)\strut
\end{minipage} & \begin{minipage}[b]{0.18\columnwidth}\centering
\(\pi\rho\text{-}colBiSBM\)\strut
\end{minipage} & \begin{minipage}[b]{0.11\columnwidth}\raggedright
Recovered \(Q_1\)\strut
\end{minipage}\tabularnewline
\midrule
\endfirsthead
\toprule
\begin{minipage}[b]{0.08\columnwidth}\raggedright
\(\eps[\pi]\)\strut
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering
\(iid\text{-}colBiSBM\)\strut
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering
\(\pi\text{-}colBiSBM\)\strut
\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\centering
\(\rho\text{-}colBiSBM\)\strut
\end{minipage} & \begin{minipage}[b]{0.18\columnwidth}\centering
\(\pi\rho\text{-}colBiSBM\)\strut
\end{minipage} & \begin{minipage}[b]{0.11\columnwidth}\raggedright
Recovered \(Q_1\)\strut
\end{minipage}\tabularnewline
\midrule
\endhead
\begin{minipage}[t]{0.08\columnwidth}\raggedright
0.00\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.65\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.00\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.35\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.00\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.00\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.08\columnwidth}\raggedright
0.04\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.66\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.00\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.34\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.00\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.00\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.08\columnwidth}\raggedright
0.07\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.64\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.01\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.34\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.01\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.01\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.08\columnwidth}\raggedright
0.11\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.63\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.03\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.31\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.03\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.01\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.08\columnwidth}\raggedright
0.14\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.55\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.12\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.28\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.05\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.00\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.08\columnwidth}\raggedright
0.18\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.39\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.26\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.21\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.13\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.01\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.08\columnwidth}\raggedright
0.21\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.23\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.42\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.13\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.23\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.01\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.08\columnwidth}\raggedright
0.25\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.10\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.56\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.05\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.29\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.02\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.08\columnwidth}\raggedright
0.28\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.01\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.65\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.01\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.33\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.01\strut
\end{minipage}\tabularnewline
\bottomrule
\end{longtable}
\begin{longtable}[]{@{}lccccl@{}} \caption{\label{tab:tables}\label{tab:pi-model-sel}Model selection for varying $\pi$ mixture parameters}
\caption{\label{tab:rho-model-sel}Model selection for varying \(\rho\) \centering
mixture parameters}\tabularnewline \begin{tabular}[t]{lccccl}
\toprule \toprule
\begin{minipage}[b]{0.09\columnwidth}\raggedright \multicolumn{1}{c}{ } & \multicolumn{4}{c}{Proportions of model selection} & \multicolumn{1}{c}{Blocks} \\
\(\eps[\rho]\)\strut \cmidrule(l{3pt}r{3pt}){2-5} \cmidrule(l{3pt}r{3pt}){6-6}
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering $\eps[\pi]$ & $iid\text{-}colBiSBM$ & $\pi\text{-}colBiSBM$ & $\rho\text{-}colBiSBM$ & $\pi\rho\text{-}colBiSBM$ & Recovered $Q_1$\\
\(iid\text{-}colBiSBM\)\strut
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering
\(\pi\text{-}colBiSBM\)\strut
\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\centering
\(\rho\text{-}colBiSBM\)\strut
\end{minipage} & \begin{minipage}[b]{0.18\columnwidth}\centering
\(\pi\rho\text{-}colBiSBM\)\strut
\end{minipage} & \begin{minipage}[b]{0.11\columnwidth}\raggedright
Recovered \(Q_2\)\strut
\end{minipage}\tabularnewline
\midrule \midrule
\endfirsthead 0.00 & 0.65 & 0.00 & 0.35 & 0.00 & 3.00\\
\toprule 0.04 & 0.66 & 0.00 & 0.34 & 0.00 & 3.00\\
\begin{minipage}[b]{0.09\columnwidth}\raggedright 0.07 & 0.64 & 0.01 & 0.34 & 0.01 & 3.01\\
\(\eps[\rho]\)\strut 0.11 & 0.63 & 0.03 & 0.31 & 0.03 & 3.01\\
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering 0.14 & 0.55 & 0.12 & 0.28 & 0.05 & 3.00\\
\(iid\text{-}colBiSBM\)\strut \addlinespace
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering 0.18 & 0.39 & 0.26 & 0.21 & 0.13 & 3.01\\
\(\pi\text{-}colBiSBM\)\strut 0.21 & 0.23 & 0.42 & 0.13 & 0.23 & 3.01\\
\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\centering 0.25 & 0.10 & 0.56 & 0.05 & 0.29 & 3.02\\
\(\rho\text{-}colBiSBM\)\strut 0.28 & 0.01 & 0.65 & 0.01 & 0.33 & 3.01\\
\end{minipage} & \begin{minipage}[b]{0.18\columnwidth}\centering
\(\pi\rho\text{-}colBiSBM\)\strut
\end{minipage} & \begin{minipage}[b]{0.11\columnwidth}\raggedright
Recovered \(Q_2\)\strut
\end{minipage}\tabularnewline
\midrule
\endhead
\begin{minipage}[t]{0.09\columnwidth}\raggedright
0.00\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.63\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.37\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.00\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.00\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.00\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.09\columnwidth}\raggedright
0.04\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.65\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.34\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.00\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.01\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.00\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.09\columnwidth}\raggedright
0.07\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.64\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.33\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.01\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.01\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.00\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.09\columnwidth}\raggedright
0.11\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.64\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.31\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.03\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.02\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.00\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.09\columnwidth}\raggedright
0.14\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.53\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.29\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.11\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.06\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.00\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.09\columnwidth}\raggedright
0.18\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.42\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.20\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.24\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.14\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.01\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.09\columnwidth}\raggedright
0.21\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.25\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.12\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.40\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.22\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.01\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.09\columnwidth}\raggedright
0.25\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.08\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.06\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.58\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.29\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.01\strut
\end{minipage}\tabularnewline
\begin{minipage}[t]{0.09\columnwidth}\raggedright
0.28\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.01\strut
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
0.01\strut
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
0.65\strut
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
0.32\strut
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
3.00\strut
\end{minipage}\tabularnewline
\bottomrule \bottomrule
\end{longtable} \end{tabular}
\end{table}
\begin{table}[!h]
\caption{\label{tab:tables}\label{tab:rho-model-sel}Model selection for varying $\rho$ mixture parameters}
\centering
\begin{tabular}[t]{lccccl}
\toprule
\multicolumn{1}{c}{ } & \multicolumn{4}{c}{Proportions of model selection} & \multicolumn{1}{c}{Blocks} \\
\cmidrule(l{3pt}r{3pt}){2-5} \cmidrule(l{3pt}r{3pt}){6-6}
$\eps[\rho]$ & $iid\text{-}colBiSBM$ & $\pi\text{-}colBiSBM$ & $\rho\text{-}colBiSBM$ & $\pi\rho\text{-}colBiSBM$ & Recovered $Q_2$\\
\midrule
0.00 & 0.63 & 0.37 & 0.00 & 0.00 & 3.00\\
0.04 & 0.65 & 0.34 & 0.00 & 0.01 & 3.00\\
0.07 & 0.64 & 0.33 & 0.01 & 0.01 & 3.00\\
0.11 & 0.64 & 0.31 & 0.03 & 0.02 & 3.00\\
0.14 & 0.53 & 0.29 & 0.11 & 0.06 & 3.00\\
\addlinespace
0.18 & 0.42 & 0.20 & 0.24 & 0.14 & 3.01\\
0.21 & 0.25 & 0.12 & 0.40 & 0.22 & 3.01\\
0.25 & 0.08 & 0.06 & 0.58 & 0.29 & 3.01\\
0.28 & 0.01 & 0.01 & 0.65 & 0.32 & 3.00\\
\bottomrule
\end{tabular}
\end{table}
\begin{figure}[H] \begin{figure}[H]
\includegraphics{./Rcodes/simulation/img/plot_model_function_eps.png} \includegraphics{./Rcodes/simulation/img/plot_model_function_eps.png}
@ -365,6 +103,8 @@ Recovered \(Q_2\)\strut
\label{fig:pref_model_func_eps} \label{fig:pref_model_func_eps}
\end{figure} \end{figure}
\paragraph{Results:}
On the figure \ref{fig:pref_model_func_eps} and tables On the figure \ref{fig:pref_model_func_eps} and tables
\ref{tab:pi-model-sel} and \ref{tab:rho-model-sel}, one can see that \ref{tab:pi-model-sel} and \ref{tab:rho-model-sel}, one can see that
there is a turning point around \(\eps[\pi] = 0.2\) (resp. there is a turning point around \(\eps[\pi] = 0.2\) (resp.
@ -375,5 +115,7 @@ most of the times and after \(0.2\) the \(\pi\text{-}colBiSBM\) (resp.
more selected, highlighting our capacity to recover the simulated more selected, highlighting our capacity to recover the simulated
structure. structure.
\paragraph*{Remark:}
Please note that when ``Recovered \(Q_1\)(or \(Q_2\))'' is not an Please note that when ``Recovered \(Q_1\)(or \(Q_2\))'' is not an
integer it's because some procedures returned a value other than 3. integer it's because some procedures returned a value other than 3.

View file

@ -1,9 +1,8 @@
require("ggplot2") require("ggplot2")
require("tictoc") require("tictoc")
require("colSBM")
devtools::load_all("R/") result_clustering <- readRDS("./Rcodes/simulation/data/simulated")
result_clustering <- readRDS("simulation/data/simulated_collection_clustering_rho_10-05-23-14:40:46.Rds")
list_clustering <- lapply( list_clustering <- lapply(
seq_along(result_clustering), function(s) result_clustering[[s]]$list_of_clusterings seq_along(result_clustering), function(s) result_clustering[[s]]$list_of_clusterings
@ -13,7 +12,7 @@ list_best_partition <- lapply(
seq_along(list_clustering), function(s) { seq_along(list_clustering), function(s) {
list( list(
epsilon = result_clustering[[s]]$epsilon, epsilon = result_clustering[[s]]$epsilon,
best_partition = extract_bipartite_best_partition(list_clustering[[s]]) best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]]))
) )
} }
) )

View file

@ -0,0 +1,19 @@
```{r libraries, echo = FALSE, include = FALSE}
require("ggplot2")
require("tidyr")
require("dplyr")
require("patchwork")
require("latex2exp")
```
\section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}
```{r impoting-data, echo = FALSE}
filenames <- list.files(
path = "./data/",
pattern = "simulated_collection_clustering_*",
full.names = TRUE
)
# data_list <- lapply(filenames, function(file) lapply(readRDS(file), function(model) model$list_clustering))
data_list <- lapply(filenames, readRDS)
```

View file

@ -0,0 +1 @@
\section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}

View file

@ -1,11 +1,14 @@
#!/usr/bin/env Rscript #!/usr/bin/Rscript
require("knitr")
print(getwd())
options(knitr.table.format = "latex")
create_latex <- function(f) { create_latex <- function(f) {
knitr::knit(f, "/tmp/tmp-outputfile.md") knitr::knit(f, "/tmp/tmp-outputfile.md")
newname <- paste0(tools::file_path_sans_ext(f), ".tex") newname <- paste0(tools::file_path_sans_ext(f), ".tex")
mess <- paste("pandoc --extract-media=./img -f markdown -t latex -p -o", shQuote(newname), "/tmp/tmp-outputfile.md") mess <- paste("pandoc --extract-media=./img -f markdown -t latex -p /tmp/tmp-outputfile.md -o", shQuote(newname))
system(mess) system(mess)
} }
args <- commandArgs(trailingOnly = TRUE) args <- commandArgs(trailingOnly = TRUE)
create_latex(args) create_latex(unlist(args))

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 65 KiB

View file

@ -69,4 +69,6 @@ nouvelles problématiques passionnantes.
De plus j'ai beaucoup progressé dans les domaines abordés pendant mon De plus j'ai beaucoup progressé dans les domaines abordés pendant mon
stage, et cela m'a rendu confiant dans le choix de faire le stage, et cela m'a rendu confiant dans le choix de faire le
master \emph{MathSV} pour l'année scolaire 2023-2024. Ce stage a donc été master \emph{MathSV} pour l'année scolaire 2023-2024. Ce stage a donc été
déterminant et confirme l'orientation de mon parcours professionnel. déterminant et confirme l'orientation de mon parcours professionnel.
\paragraph*{Note} La suite de ce rapport a été rédigée en anglais.

Binary file not shown.

View file

@ -21,6 +21,7 @@
\usepackage{rotating} % For allowing to rotate figures \usepackage{rotating} % For allowing to rotate figures
\usepackage{svg} % To allow svg inclusions \usepackage{svg} % To allow svg inclusions
\usepackage{float} % To allow Pandoc to control figure placement \usepackage{float} % To allow Pandoc to control figure placement
% \usepackage{booktabs} % For good tables
%% Bibliography %% Bibliography
\usepackage[style=apa,citestyle=authoryear-comp]{biblatex} \usepackage[style=apa,citestyle=authoryear-comp]{biblatex}
@ -272,23 +273,25 @@ This model supposes that:
\label{fig:LBMvisu} \label{fig:LBMvisu}
\end{figure} \end{figure}
Parameters
% TODO fix parameters according to presentation
\begin{itemize} \begin{itemize}
\item $Q_1 = \{{\color{blueind}\bullet},{\color{cyanind}\bullet},{\color{electricblue}\bullet}\}$ blocks in rows \item $Q_1 = |\{{\color{blueind}\bullet},{\color{cyanind}\bullet},{\color{electricblue}\bullet}\}|$ \emph{given} blocks in rows
\item $Q_2 = \{{\color{burntorange}\bullet},{\color{goldenyellow}\bullet},{\color{yellow}\bullet}\}$ blocks in columns \item $Q_2 = |\{{\color{burntorange}\bullet},{\color{goldenyellow}\bullet},{\color{yellow}\bullet}\}|$ \emph{given} blocks in columns
\item $\pi_{\bullet} = \mathbb{P}(i\in\bullet)$ in row and $\rho_{\bullet} = \mathbb{P}(j\in\bullet)$ in column \end{itemize}
\item $\alpha_{{\color{blueind}\bullet}{\color{burntorange}\bullet}} = \mathbb{P}(i \leftrightarrow j | i \in {\color{blueind}\bullet}, j \in {\color{burntorange}\bullet})$ connectivity probability between two nodes, given their clustering Parameters
\begin{itemize}
\item $\pi_{\bullet} = \mathbb{P}(Z_i = \bullet)$ for rows and $\rho_{\bullet} = \mathbb{P}(W_j = \bullet)$ for columns
\item $\alpha_{{\color{blueind}\bullet}{\color{burntorange}\bullet}} = \mathbb{P}(X_{ij} = 1 | Z_i = {\color{blueind}\bullet}, W_j = {\color{burntorange}\bullet})$, probability of connectivity knowing node membership blocks.
\end{itemize} \end{itemize}
On \ref{fig:LBMvisu}, $\pi$ are the probabilities for a row node to belong to On \ref{fig:LBMvisu}, $\bm{\pi}$ are the probabilities for a row node to belong to
the row block of corresponding color, $\rho$ are the probabilities for a column the row block of corresponding color, $\bm{\rho}$ are the probabilities for a column
node to belong to the column block of corresponding color and $\alpha$ are the node to belong to the column block of corresponding color and $\bm{\alpha}$ are the
connectivity parameters between the row and column blocks. connectivity parameters between the row and column blocks.
This model can be used to easily generate bipartite graphs with complex and very This model can be used to easily generate bipartite graphs with complex and very
varied structures. But when trying to determine the structure of a given network varied structures. But when trying to determine the structure of a given network
we need to find those parameters. we need to find those parameters and as the row and column block memberships are
\emph{latent} i.e.,\ they are not known and must be inferred.
For this a common approach is to use a VEM algorithm For this a common approach is to use a VEM algorithm
(proposed for SBM in ~\cite{daudinMixtureModelRandom2008} and for LBM in ~\cite{govaertEMAlgorithmBlock2005}) (proposed for SBM in ~\cite{daudinMixtureModelRandom2008} and for LBM in ~\cite{govaertEMAlgorithmBlock2005})
@ -316,7 +319,7 @@ We define a collection of bipartite networks as $\bm{X} = (X^1, \dots, X^M)$
the collection of incidence matrix. Moreover, all the networks in the collection the collection of incidence matrix. Moreover, all the networks in the collection
have the same type of interaction (e.g., all interactions are binary). have the same type of interaction (e.g., all interactions are binary).
\section{Separate BiSBM (sepBiSBM)}\label{sec:separate-bisbm-sepbisbm} \section{Separate BiSBM (sep-BiSBM)}\label{sec:separate-bisbm-sepbisbm}
A first approach to deal with a collection of networks is to adjust separate A first approach to deal with a collection of networks is to adjust separate
BiSBM for each network of the collection. BiSBM for each network of the collection.
@ -531,9 +534,6 @@ $\bm{\tau}$. \\
% \mathcal{J}(\mathcal{R};\bm{\theta}) \coloneqq \mathbb{E}_{\mathcal{R}}[\ell(\bm{X},\bm{Z},\bm{W};\bm{\theta})] + \mathcal{H}(\bm{Z,W}) \leq \ell(\bm{X};\bm{\theta}) % \mathcal{J}(\mathcal{R};\bm{\theta}) \coloneqq \mathbb{E}_{\mathcal{R}}[\ell(\bm{X},\bm{Z},\bm{W};\bm{\theta})] + \mathcal{H}(\bm{Z,W}) \leq \ell(\bm{X};\bm{\theta})
% \end{equation*} % \end{equation*}
% TODO Develop the formula
The VEM algorithm alternates between two steps, the variational E step and the M step. The VEM algorithm alternates between two steps, the variational E step and the M step.
The E steps consists in optimizing $\mathcal{J}(\bm{\tau};\bm{\theta})$ for a The E steps consists in optimizing $\mathcal{J}(\bm{\tau};\bm{\theta})$ for a
current value of $\bm{\theta}$ with respect to $\bm{\tau}$. And the M step current value of $\bm{\theta}$ with respect to $\bm{\tau}$. And the M step
@ -1060,8 +1060,9 @@ We illustrate our capacity to perform a partition of a collection for all
colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}. colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}.
\chapter{Simulation studies}\label{chap:simulation-studies} \chapter{Simulation studies}\label{chap:simulation-studies}
\include{Rcodes/simulation/inference_analyze}
\include{Rcodes/simulation/model_selection_analyze} \include{Rcodes/simulation/model_selection_analyze}
\section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks} \include{Rcodes/simulation/netclustering_analyze}
\chapter{Applications} \chapter{Applications}
\include{Rcodes/real_data/application_dore_data} \include{Rcodes/real_data/application_dore_data}

View file

@ -198,6 +198,25 @@
file = {/home/polarolouis/Zotero/storage/6F8YT8AD/holland1983.pdf.pdf;/home/polarolouis/Zotero/storage/7DSZ3KD9/Holland et al. - 1983 - Stochastic blockmodels First steps.pdf;/home/polarolouis/Zotero/storage/DUL2RV8Q/holland1983.pdf.pdf;/home/polarolouis/Zotero/storage/G9KZBG9W/0378873383900217.html} file = {/home/polarolouis/Zotero/storage/6F8YT8AD/holland1983.pdf.pdf;/home/polarolouis/Zotero/storage/7DSZ3KD9/Holland et al. - 1983 - Stochastic blockmodels First steps.pdf;/home/polarolouis/Zotero/storage/DUL2RV8Q/holland1983.pdf.pdf;/home/polarolouis/Zotero/storage/G9KZBG9W/0378873383900217.html}
} }
@article{hubertComparingPartitions1985,
title = {Comparing Partitions},
author = {Hubert, Lawrence and Arabie, Phipps},
date = {1985-12-01},
journaltitle = {Journal of Classification},
shortjournal = {Journal of Classification},
volume = {2},
number = {1},
pages = {193--218},
issn = {1432-1343},
doi = {10.1007/BF01908075},
url = {https://doi.org/10.1007/BF01908075},
urldate = {2023-07-04},
abstract = {The problem of comparing two different partitions of a finite set of objects reappears continually in the clustering literature. We begin by reviewing a well-known measure of partition correspondence often attributed to Rand (1971), discuss the issue of correcting this index for chance, and note that a recent normalization strategy developed by Morey and Agresti (1984) and adopted by others (e.g., Miligan and Cooper 1985) is based on an incorrect assumption. Then, the general problem of comparing partitions is approached indirectly by assessing the congruence of two proximity matrices using a simple cross-product measure. They are generated from corresponding partitions using various scoring rules. Special cases derivable include traditionally familiar statistics and/or ones tailored to weight certain object pairs differentially. Finally, we propose a measure based on the comparison of object triples having the advantage of a probabilistic interpretation in addition to being corrected for chance (i.e., assuming a constant value under a reasonable null hypothesis) and bounded between ±1.},
langid = {english},
keywords = {Consensus indices,Measures of agreement,Measures of association},
file = {/home/polarolouis/Zotero/storage/7TKW7HEM/Hubert et Arabie - 1985 - Comparing partitions.pdf}
}
@article{kaszewska-gilasGlobalStudiesHostParasite2021, @article{kaszewska-gilasGlobalStudiesHostParasite2021,
title = {Global {{Studies}} of the {{Host-Parasite Relationships}} between {{Ectoparasitic Mites}} of the {{Family Syringophilidae}} and {{Birds}} of the {{Order Columbiformes}}}, title = {Global {{Studies}} of the {{Host-Parasite Relationships}} between {{Ectoparasitic Mites}} of the {{Family Syringophilidae}} and {{Birds}} of the {{Order Columbiformes}}},
author = {Kaszewska-Gilas, Katarzyna and Kosicki, Jakub Ziemowit and Hromada, Martin and Skoracki, Maciej}, author = {Kaszewska-Gilas, Katarzyna and Kosicki, Jakub Ziemowit and Hromada, Martin and Skoracki, Maciej},