Adding work BOD 7/7/23
This commit is contained in:
parent
37e5b958a2
commit
767f2f86c7
14 changed files with 545 additions and 343 deletions
2
.vscode/settings.json
vendored
2
.vscode/settings.json
vendored
|
|
@ -4,7 +4,7 @@
|
|||
{
|
||||
"match": ".*\\.Rmd",
|
||||
"isAsync": true,
|
||||
"cmd": "Rscript Rmd2Latex-fragment.R '${file}' "
|
||||
"cmd": "/bin/bash -c \"Rscript Rmd2Latex-fragment.R '${file}'\""
|
||||
},
|
||||
{
|
||||
"match": ".*",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,196 @@
|
|||
```{r libraries, echo = FALSE, include = FALSE}
|
||||
require("ggplot2")
|
||||
require("tidyr")
|
||||
require("dplyr")
|
||||
require("stringr")
|
||||
require("knitr")
|
||||
require("pander")
|
||||
require("patchwork")
|
||||
require("latex2exp")
|
||||
```
|
||||
|
||||
```{r setup, echo = FALSE}
|
||||
options(dplyr.summarise.inform = FALSE)
|
||||
knitr::opts_knit$set(kable.force.latex = TRUE)
|
||||
|
||||
meanse <- function(x, ...) {
|
||||
mean1 <- signif(round(mean(x, na.rm = T), 2), 5) # calculate mean and round
|
||||
se1 <- signif(round(sd(x, na.rm = T) / sqrt(sum(!is.na(x))), 2), 2) # std error - round adding zeros
|
||||
out <- paste(mean1, "$\\pm$", se1) # paste together mean plus/minus and standard error
|
||||
if (str_detect(out, "NA")) {
|
||||
out <- "NA"
|
||||
} # if missing do not add plusminus
|
||||
if (se1 == 0) {
|
||||
out <- paste(mean1)
|
||||
}
|
||||
return(out)
|
||||
}
|
||||
```
|
||||
|
||||
```{r import-data, echo = FALSE}
|
||||
filenames <- list.files(
|
||||
path = "./data/",
|
||||
pattern = "inference_testing_2023-07-*",
|
||||
full.names = TRUE
|
||||
)
|
||||
data_list <- lapply(filenames, readRDS)
|
||||
col_id_BICLS <- c(11, 16, 23, 30, 37)
|
||||
result_data_frame <- dplyr::bind_rows(data_list)
|
||||
|
||||
# Compute the preferred model
|
||||
result_data_frame <- cbind(result_data_frame, preferred_model = sapply(seq_len(nrow(result_data_frame)), function(n) sub("_BICL", "", names(which.max(result_data_frame[n, col_id_BICLS])))))
|
||||
|
||||
```
|
||||
|
||||
# Efficiency of the inference
|
||||
|
||||
\paragraph{Simulation settings} For this simulation the data is simulated with
|
||||
$M = 2, n_{1}^{m} = 120, n_{2}^{m} = 120, Q_1 = Q_2 = 4$, $\bm{\alpha}, \bm{\pi}$
|
||||
and $\bm{\rho}$ are set as follows:
|
||||
\begin{align*}
|
||||
&&\bm{\alpha} = .25 +
|
||||
\begin{pmatrix}
|
||||
3 \eps[\alpha] & 2 \eps[\alpha] & \eps[\alpha] & - \eps[\alpha]\\
|
||||
2 \eps[\alpha] & 2 \eps[\alpha] & - \eps[\alpha] & \eps[\alpha]\\
|
||||
\eps[\alpha] & - \eps[\alpha] & \eps[\alpha] & 2 \eps[\alpha]\\
|
||||
- \eps[\alpha] & \eps[\alpha] & 2 \eps[\alpha] & 0
|
||||
\end{pmatrix}, \\ \bm{\pi}^1 = \sigma_1
|
||||
\begin{pmatrix}
|
||||
0.2 & 0.4 & 0.4 & 0
|
||||
\end{pmatrix},
|
||||
&& \bm{\pi}^2 =
|
||||
\begin{pmatrix}
|
||||
0.25 & 0.25 & 0.25 & 0.25
|
||||
\end{pmatrix}, \\
|
||||
\bm{\rho}^1 =
|
||||
\begin{pmatrix}
|
||||
0.25 & 0.25 & 0.25 & 0.25
|
||||
\end{pmatrix}, &&
|
||||
\bm{\rho}^2 = \sigma_2
|
||||
\begin{pmatrix}
|
||||
0 & 0.33 & 0.33 & 0.33
|
||||
\end{pmatrix}, &&
|
||||
\end{align*}
|
||||
with $\eps[\alpha]$ taking nine equally spaced values ranging from 0 to 0.24.
|
||||
For each value of $\eps[\alpha]$, 108 datasets ($X_1, X_2$) are simulated,
|
||||
resulting in $9 \times 108 = 972$ datasets. More precisely, for each dataset,
|
||||
we pick uniformly at random two permutations of $\{ 1, \dots , 4 \}$
|
||||
($\sigma_1, \sigma_2$) with the constraint that $\sigma_1(4) \neq \sigma_2(1)$.
|
||||
This ensures that each of the two networks have a non-empty block that is empty
|
||||
in the other one. Then the networks are simulated with
|
||||
$\mathcal{B}$ern-$BiSBM_{120}(4, \bm{\alpha}, \bm{\pi}^m, \bm{\rho}^m)$
|
||||
with the previous parameters. Each network has 2 blocks in common and their
|
||||
connectivity structures encompass a mix of core-periphery, assortative
|
||||
community and disassortative community structures, depending on which 3 of the 4
|
||||
blocks are selected for each network. $\eps[\alpha]$ represents the strength of
|
||||
these structures, the larger, the easier it is to tell apart one block from
|
||||
another.
|
||||
|
||||
\paragraph{Inference} We want to measure the quality of the
|
||||
inference procedure, for this we use the inference described in the section
|
||||
\ref{sec:variational-estimation-of-the-parameters}.
|
||||
|
||||
\paragraph{Quality indicators} To assess the quality of the inference, we will
|
||||
use the following indicators:
|
||||
\begin{itemize}
|
||||
\item First, for each dataset, we put in competition $\pi\text{-}colBiSBM$ with
|
||||
$sep\text{-}BiSBM$, $iid\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
|
||||
$\pi\rho\text{-}colBiSBM$
|
||||
respectively. To do so, for each dataset, we compute the
|
||||
BIC-L of each model $\pi\text{-}colBiSBM$ is preferred to $sep\text{-}BiSBM$
|
||||
(resp. $iid\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
|
||||
$\pi\rho\text{-}colBiSBM$) if
|
||||
its BIC-L is greater.
|
||||
\item When considering $\pi\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
|
||||
$\pi\rho\text{-}colBiSBM$ we compare $\widehat{Q_1}$, $\widehat{Q_2}$ to
|
||||
their true values. ($Q_1 = 4$ and $Q_2 = 4$)
|
||||
\item Finally, we assess the quality of the node grouping by computing the
|
||||
Adjusted Rand Index \parencite[][, ARI = 0 for a random grouping, ARI = 1 for a perfect recovery]{hubertComparingPartitions1985}. For each network, for the
|
||||
$\pi\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
|
||||
$\pi\rho\text{-}colBiSBM$ we compare the inferred block memberships to the
|
||||
real ones by computing the mean of the ARI per axis over the two networks
|
||||
\begin{equation*}
|
||||
\overline{\text{ARI}}_d = \frac{1}{2} \text{ARI}\big( \text{ARI}(\widehat{\bm{Z}^1_d},\bm{Z}^1_d) + \text{ARI}(\widehat{\bm{Z}^2_d},\bm{Z}^2_d) \big)
|
||||
\end{equation*}
|
||||
where $d$ is the dimension or axis (i.e., rows, $d=1$, or columns, $d=2$) of
|
||||
the block memberships.
|
||||
And we compute the ARI of the whole set of nodes to account for block
|
||||
pairing between networks
|
||||
\begin{equation*}
|
||||
\text{ARI}_d = \text{ARI}\big((\widehat{\bm{Z}^1_d},\widehat{\bm{Z}^2_d}),(\bm{Z}^1_d,\bm{Z}^2_d) \big)
|
||||
\end{equation*}
|
||||
\end{itemize}
|
||||
|
||||
All these quality indicators are averaged over the 108 datasets. The results are
|
||||
provided in the tables \ref{tab:per_model_sep} to \ref{tab:per_model_pirho}. Each line corresponds to the
|
||||
108 datasets for a given value of value of $\eps[\alpha]$.
|
||||
|
||||
```{r inference_table, echo = FALSE}
|
||||
averaged_data <- result_data_frame %>%
|
||||
group_by(epsilon_alpha) %>%
|
||||
summarise(across(-preferred_model, list("avrg" = meanse))) %>%
|
||||
select(-c(2:10))
|
||||
averaged_data <- averaged_data %>%
|
||||
select(which(!grepl("*_BICL_*", colnames(averaged_data)),
|
||||
arr.ind = TRUE))
|
||||
```
|
||||
|
||||
```{r function_per_model, echo = FALSE}
|
||||
dataframe_per_model <- function(model) {
|
||||
averaged_data %>%
|
||||
select(epsilon_alpha, starts_with(paste0(model, "_")))
|
||||
}
|
||||
```
|
||||
|
||||
```{r per_model_table, echo = FALSE, results='asis', message=FALSE, warning = FALSE}
|
||||
for (model in c("sep", "iid", "pi", "rho", "pirho")) {
|
||||
kable_colnames <- c(
|
||||
"$\\eps[\\alpha]$", #"BIC-L",
|
||||
"$\\overline{\\text{ARI}}_{1}$",
|
||||
"$\\overline{\\text{ARI}}_{2}$", "$\\text{ARI}_{1}$", "$\\text{ARI}_{2}$"
|
||||
)
|
||||
model_name <- model
|
||||
if (model != "sep") {
|
||||
kable_colnames <- c(
|
||||
kable_colnames, "Recovered $Q_1$",
|
||||
"Recovered $Q_2$"
|
||||
)
|
||||
}
|
||||
if (model == "pirho") {
|
||||
model_name <- "$\\pi\\rho$"
|
||||
} else {
|
||||
if (model != "iid" && model != "sep") model_name <- paste0("$\\", model, "$")
|
||||
}
|
||||
print(kable(dataframe_per_model(model),
|
||||
escape = FALSE,
|
||||
booktabs = TRUE,
|
||||
digits = 2,
|
||||
position = "!h",
|
||||
caption = paste0(
|
||||
"\\label{tab:per_model_", model,
|
||||
"}Quality metrics for ",
|
||||
paste0(model_name, "-colBiSBM")
|
||||
),
|
||||
col.names = kable_colnames
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
```{r proportion-preferred_model, echo = FALSE}
|
||||
proportion_preferred_table <- result_data_frame %>%
|
||||
group_by(epsilon_alpha, preferred_model) %>%
|
||||
summarise(n = n()) %>%
|
||||
mutate(freq = n / sum(n)) %>%
|
||||
ungroup() %>%
|
||||
select(-n) %>%
|
||||
pivot_wider(
|
||||
names_from = preferred_model,
|
||||
values_from = freq, values_fill = 0
|
||||
)
|
||||
```
|
||||
|
||||
\paragraph{Results} For the model comparison, when $\eps[\alpha]$ is small
|
||||
($\eps[\alpha]\in[0, .04]$), the simulation model is close to the
|
||||
Erd\H{o}s-Reńyi network and it is very hard to find any structure beyond the one
|
||||
of a single block on each dimension.
|
||||
205
Rcodes/simulation/inference_analyze.tex
Normal file
205
Rcodes/simulation/inference_analyze.tex
Normal file
|
|
@ -0,0 +1,205 @@
|
|||
\hypertarget{efficiency-of-the-inference}{%
|
||||
\section{Efficiency of the
|
||||
inference}\label{efficiency-of-the-inference}}
|
||||
|
||||
\paragraph{Simulation settings}
|
||||
|
||||
For this simulation the data is simulated with
|
||||
\(M = 2, n_{1}^{m} = 120, n_{2}^{m} = 120, Q_1 = Q_2 = 4\),
|
||||
\(\bm{\alpha}, \bm{\pi}\) and \(\bm{\rho}\) are set as follows:
|
||||
\begin{align*}
|
||||
&&\bm{\alpha} = .25 +
|
||||
\begin{pmatrix}
|
||||
3 \eps[\alpha] & 2 \eps[\alpha] & \eps[\alpha] & - \eps[\alpha]\\
|
||||
2 \eps[\alpha] & 2 \eps[\alpha] & - \eps[\alpha] & \eps[\alpha]\\
|
||||
\eps[\alpha] & - \eps[\alpha] & \eps[\alpha] & 2 \eps[\alpha]\\
|
||||
- \eps[\alpha] & \eps[\alpha] & 2 \eps[\alpha] & 0
|
||||
\end{pmatrix}, \\ \bm{\pi}^1 = \sigma_1
|
||||
\begin{pmatrix}
|
||||
0.2 & 0.4 & 0.4 & 0
|
||||
\end{pmatrix},
|
||||
&& \bm{\pi}^2 =
|
||||
\begin{pmatrix}
|
||||
0.25 & 0.25 & 0.25 & 0.25
|
||||
\end{pmatrix}, \\
|
||||
\bm{\rho}^1 =
|
||||
\begin{pmatrix}
|
||||
0.25 & 0.25 & 0.25 & 0.25
|
||||
\end{pmatrix}, &&
|
||||
\bm{\rho}^2 = \sigma_2
|
||||
\begin{pmatrix}
|
||||
0 & 0.33 & 0.33 & 0.33
|
||||
\end{pmatrix}, &&
|
||||
\end{align*} with \(\eps[\alpha]\) taking nine equally spaced values
|
||||
ranging from 0 to 0.24. For each value of \(\eps[\alpha]\), 108 datasets
|
||||
(\(X_1, X_2\)) are simulated, resulting in \(9 \times 108 = 972\)
|
||||
datasets. More precisely, for each dataset, we pick uniformly at random
|
||||
two permutations of \(\{ 1, \dots , 4 \}\) (\(\sigma_1, \sigma_2\)) with
|
||||
the constraint that \(\sigma_1(4) \neq \sigma_2(1)\). This ensures that
|
||||
each of the two networks have a non-empty block that is empty in the
|
||||
other one. Then the networks are simulated with
|
||||
\(\mathcal{B}\)ern-\(BiSBM_{120}(4, \bm{\alpha}, \bm{\pi}^m, \bm{\rho}^m)\)
|
||||
with the previous parameters. Each network has 2 blocks in common and
|
||||
their connectivity structures encompass a mix of core-periphery,
|
||||
assortative community and disassortative community structures, depending
|
||||
on which 3 of the 4 blocks are selected for each network.
|
||||
\(\eps[\alpha]\) represents the strength of these structures, the
|
||||
larger, the easier it is to tell apart one block from another.
|
||||
|
||||
\paragraph{Inference}
|
||||
|
||||
We want to measure the quality of the inference procedure, for this we
|
||||
use the inference described in the section
|
||||
\ref{sec:variational-estimation-of-the-parameters}.
|
||||
|
||||
\paragraph{Quality indicators}
|
||||
|
||||
To assess the quality of the inference, we will use the following
|
||||
indicators:
|
||||
|
||||
\begin{itemize}
|
||||
\item First, for each dataset, we put in competition $\pi\text{-}colBiSBM$ with
|
||||
$sep\text{-}BiSBM$, $iid\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
|
||||
$\pi\rho\text{-}colBiSBM$
|
||||
respectively. To do so, for each dataset, we compute the
|
||||
BIC-L of each model $\pi\text{-}colBiSBM$ is preferred to $sep\text{-}BiSBM$
|
||||
(resp. $iid\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
|
||||
$\pi\rho\text{-}colBiSBM$) if
|
||||
its BIC-L is greater.
|
||||
\item When considering $\pi\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
|
||||
$\pi\rho\text{-}colBiSBM$ we compare $\widehat{Q_1}$, $\widehat{Q_2}$ to
|
||||
their true values. ($Q_1 = 4$ and $Q_2 = 4$)
|
||||
\item Finally, we assess the quality of the node grouping by computing the
|
||||
Adjusted Rand Index \parencite[][, ARI = 0 for a random grouping, ARI = 1 for a perfect recovery]{hubertComparingPartitions1985}. For each network, for the
|
||||
$\pi\text{-}colBiSBM$, $\rho\text{-}colBiSBM$,
|
||||
$\pi\rho\text{-}colBiSBM$ we compare the inferred block memberships to the
|
||||
real ones by computing the mean of the ARI per axis over the two networks
|
||||
\begin{equation*}
|
||||
\overline{\text{ARI}}_d = \frac{1}{2} \text{ARI}\big( \text{ARI}(\widehat{\bm{Z}^1_d},\bm{Z}^1_d) + \text{ARI}(\widehat{\bm{Z}^2_d},\bm{Z}^2_d) \big)
|
||||
\end{equation*}
|
||||
where $d$ is the dimension or axis (i.e., rows, $d=1$, or columns, $d=2$) of
|
||||
the block memberships.
|
||||
And we compute the ARI of the whole set of nodes to account for block
|
||||
pairing between networks
|
||||
\begin{equation*}
|
||||
\text{ARI}_d = \text{ARI}\big((\widehat{\bm{Z}^1_d},\widehat{\bm{Z}^2_d}),(\bm{Z}^1_d,\bm{Z}^2_d) \big)
|
||||
\end{equation*}
|
||||
\end{itemize}
|
||||
|
||||
All these quality indicators are averaged over the 108 datasets. The
|
||||
results are provided in the tables \ref{tab:per_model_sep} to
|
||||
\ref{tab:per_model_pirho}. Each line corresponds to the 108 datasets for
|
||||
a given value of value of \(\eps[\alpha]\).
|
||||
|
||||
\begin{table}[!h]
|
||||
|
||||
\caption{\label{tab:per_model_table}\label{tab:per_model_sep}Quality metrics for sep-colBiSBM}
|
||||
\centering
|
||||
\begin{tabular}[t]{rllll}
|
||||
\toprule
|
||||
$\eps[\alpha]$ & $\overline{\text{ARI}}_{1}$ & $\overline{\text{ARI}}_{2}$ & $\text{ARI}_{1}$ & $\text{ARI}_{2}$\\
|
||||
\midrule
|
||||
0.00 & 0 & 0 & 0 & 0\\
|
||||
0.03 & 0 & 0 & 0 & 0\\
|
||||
0.06 & 0.1 $\pm$ 0.01 & 0.08 $\pm$ 0.01 & 0.06 $\pm$ 0.01 & 0.05 $\pm$ 0.01\\
|
||||
0.09 & 0.71 $\pm$ 0.02 & 0.7 $\pm$ 0.01 & 0.37 $\pm$ 0.02 & 0.37 $\pm$ 0.02\\
|
||||
0.12 & 0.94 $\pm$ 0.01 & 0.93 $\pm$ 0.01 & 0.5 $\pm$ 0.02 & 0.49 $\pm$ 0.02\\
|
||||
\addlinespace
|
||||
0.15 & 0.99 & 0.99 & 0.54 $\pm$ 0.02 & 0.49 $\pm$ 0.01\\
|
||||
0.18 & 0.99 & 0.99 & 0.52 $\pm$ 0.02 & 0.52 $\pm$ 0.02\\
|
||||
0.21 & 0.99 & 0.99 & 0.54 $\pm$ 0.02 & 0.52 $\pm$ 0.02\\
|
||||
0.24 & 1 & 1 & 0.55 $\pm$ 0.02 & 0.52 $\pm$ 0.02\\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
\begin{table}[!h]
|
||||
|
||||
\caption{\label{tab:per_model_table}\label{tab:per_model_iid}Quality metrics for iid-colBiSBM}
|
||||
\centering
|
||||
\begin{tabular}[t]{rllllll}
|
||||
\toprule
|
||||
$\eps[\alpha]$ & $\overline{\text{ARI}}_{1}$ & $\overline{\text{ARI}}_{2}$ & $\text{ARI}_{1}$ & $\text{ARI}_{2}$ & Recovered $Q_1$ & Recovered $Q_2$\\
|
||||
\midrule
|
||||
0.00 & 0 & 0 & 0 & 0 & 1 & 1\\
|
||||
0.03 & 0 & 0 & 0 & 0 & 1 & 1\\
|
||||
0.06 & 0.08 $\pm$ 0.01 & 0.08 $\pm$ 0.01 & 0.08 $\pm$ 0.01 & 0.07 $\pm$ 0.01 & 1.4 $\pm$ 0.05 & 1.49 $\pm$ 0.05\\
|
||||
0.09 & 0.72 $\pm$ 0.01 & 0.71 $\pm$ 0.01 & 0.53 $\pm$ 0.02 & 0.52 $\pm$ 0.02 & 3.4 $\pm$ 0.06 & 3.41 $\pm$ 0.06\\
|
||||
0.12 & 0.94 & 0.93 & 0.75 $\pm$ 0.03 & 0.72 $\pm$ 0.03 & 4.06 $\pm$ 0.02 & 3.97 $\pm$ 0.02\\
|
||||
\addlinespace
|
||||
0.15 & 0.98 & 0.98 & 0.77 $\pm$ 0.03 & 0.76 $\pm$ 0.03 & 4.11 $\pm$ 0.03 & 4.11 $\pm$ 0.03\\
|
||||
0.18 & 0.99 & 0.99 & 0.82 $\pm$ 0.03 & 0.82 $\pm$ 0.03 & 4.15 $\pm$ 0.04 & 4.13 $\pm$ 0.03\\
|
||||
0.21 & 0.99 & 0.99 & 0.8 $\pm$ 0.02 & 0.79 $\pm$ 0.03 & 4.35 $\pm$ 0.06 & 4.19 $\pm$ 0.04\\
|
||||
0.24 & 0.99 & 0.99 & 0.77 $\pm$ 0.03 & 0.77 $\pm$ 0.03 & 4.3 $\pm$ 0.06 & 4.43 $\pm$ 0.07\\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
\begin{table}[!h]
|
||||
|
||||
\caption{\label{tab:per_model_table}\label{tab:per_model_pi}Quality metrics for $\pi$-colBiSBM}
|
||||
\centering
|
||||
\begin{tabular}[t]{rllllll}
|
||||
\toprule
|
||||
$\eps[\alpha]$ & $\overline{\text{ARI}}_{1}$ & $\overline{\text{ARI}}_{2}$ & $\text{ARI}_{1}$ & $\text{ARI}_{2}$ & Recovered $Q_1$ & Recovered $Q_2$\\
|
||||
\midrule
|
||||
0.00 & 0 & 0 & 0 & 0 & 1 & 1\\
|
||||
0.03 & 0 & 0 & 0 & 0 & 1.01 $\pm$ 0.01 & 1\\
|
||||
0.06 & 0.07 $\pm$ 0.01 & 0.08 $\pm$ 0.01 & 0.07 $\pm$ 0.01 & 0.06 $\pm$ 0.01 & 1.49 $\pm$ 0.05 & 1.5 $\pm$ 0.05\\
|
||||
0.09 & 0.73 $\pm$ 0.02 & 0.72 $\pm$ 0.01 & 0.56 $\pm$ 0.02 & 0.53 $\pm$ 0.02 & 3.78 $\pm$ 0.07 & 3.37 $\pm$ 0.07\\
|
||||
0.12 & 0.96 & 0.93 & 0.79 $\pm$ 0.02 & 0.74 $\pm$ 0.03 & 4.46 $\pm$ 0.07 & 3.95 $\pm$ 0.02\\
|
||||
\addlinespace
|
||||
0.15 & 0.99 & 0.97 & 0.82 $\pm$ 0.02 & 0.76 $\pm$ 0.03 & 4.62 $\pm$ 0.08 & 4\\
|
||||
0.18 & 1 & 0.98 & 0.83 $\pm$ 0.02 & 0.79 $\pm$ 0.03 & 4.65 $\pm$ 0.09 & 4\\
|
||||
0.21 & 1 & 0.98 & 0.84 $\pm$ 0.02 & 0.79 $\pm$ 0.03 & 4.69 $\pm$ 0.1 & 4\\
|
||||
0.24 & 1 & 0.99 & 0.86 $\pm$ 0.02 & 0.79 $\pm$ 0.03 & 4.74 $\pm$ 0.11 & 4.01 $\pm$ 0.01\\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
\begin{table}[!h]
|
||||
|
||||
\caption{\label{tab:per_model_table}\label{tab:per_model_rho}Quality metrics for $\rho$-colBiSBM}
|
||||
\centering
|
||||
\begin{tabular}[t]{rllllll}
|
||||
\toprule
|
||||
$\eps[\alpha]$ & $\overline{\text{ARI}}_{1}$ & $\overline{\text{ARI}}_{2}$ & $\text{ARI}_{1}$ & $\text{ARI}_{2}$ & Recovered $Q_1$ & Recovered $Q_2$\\
|
||||
\midrule
|
||||
0.00 & 0 & 0 & 0 & 0 & 1 & 1\\
|
||||
0.03 & 0 & 0 & 0 & 0 & 1.01 $\pm$ 0.01 & 1.01 $\pm$ 0.01\\
|
||||
0.06 & 0.08 $\pm$ 0.01 & 0.08 $\pm$ 0.01 & 0.06 $\pm$ 0.01 & 0.07 $\pm$ 0.01 & 1.39 $\pm$ 0.05 & 1.6 $\pm$ 0.06\\
|
||||
0.09 & 0.72 $\pm$ 0.01 & 0.72 $\pm$ 0.01 & 0.53 $\pm$ 0.02 & 0.54 $\pm$ 0.02 & 3.39 $\pm$ 0.07 & 3.74 $\pm$ 0.07\\
|
||||
0.12 & 0.93 & 0.95 & 0.71 $\pm$ 0.03 & 0.75 $\pm$ 0.02 & 3.95 $\pm$ 0.02 & 4.5 $\pm$ 0.07\\
|
||||
\addlinespace
|
||||
0.15 & 0.97 & 0.99 & 0.78 $\pm$ 0.03 & 0.81 $\pm$ 0.02 & 4 & 4.49 $\pm$ 0.07\\
|
||||
0.18 & 0.98 & 1 & 0.76 $\pm$ 0.03 & 0.81 $\pm$ 0.02 & 4.01 $\pm$ 0.01 & 4.71 $\pm$ 0.09\\
|
||||
0.21 & 0.98 & 1 & 0.76 $\pm$ 0.03 & 0.81 $\pm$ 0.02 & 4.03 $\pm$ 0.02 & 4.72 $\pm$ 0.09\\
|
||||
0.24 & 0.98 & 1 & 0.74 $\pm$ 0.03 & 0.8 $\pm$ 0.02 & 4.06 $\pm$ 0.02 & 4.8 $\pm$ 0.1\\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
\begin{table}[!h]
|
||||
|
||||
\caption{\label{tab:per_model_table}\label{tab:per_model_pirho}Quality metrics for $\pi\rho$-colBiSBM}
|
||||
\centering
|
||||
\begin{tabular}[t]{rllllll}
|
||||
\toprule
|
||||
$\eps[\alpha]$ & $\overline{\text{ARI}}_{1}$ & $\overline{\text{ARI}}_{2}$ & $\text{ARI}_{1}$ & $\text{ARI}_{2}$ & Recovered $Q_1$ & Recovered $Q_2$\\
|
||||
\midrule
|
||||
0.00 & 0 & 0 & 0 & 0 & 1 & 1\\
|
||||
0.03 & 0 & 0 & 0 & 0 & 1.01 $\pm$ 0.01 & 1.01 $\pm$ 0.01\\
|
||||
0.06 & 0.07 $\pm$ 0.01 & 0.07 $\pm$ 0.01 & 0.07 $\pm$ 0.01 & 0.06 $\pm$ 0.01 & 1.48 $\pm$ 0.05 & 1.57 $\pm$ 0.06\\
|
||||
0.09 & 0.74 $\pm$ 0.01 & 0.73 $\pm$ 0.01 & 0.56 $\pm$ 0.03 & 0.55 $\pm$ 0.02 & 3.69 $\pm$ 0.06 & 3.66 $\pm$ 0.06\\
|
||||
0.12 & 0.96 $\pm$ 0.01 & 0.95 $\pm$ 0.01 & 0.73 $\pm$ 0.03 & 0.73 $\pm$ 0.03 & 4.31 $\pm$ 0.05 & 4.26 $\pm$ 0.05\\
|
||||
\addlinespace
|
||||
0.15 & 0.99 & 0.99 & 0.79 $\pm$ 0.02 & 0.78 $\pm$ 0.03 & 4.31 $\pm$ 0.05 & 4.35 $\pm$ 0.05\\
|
||||
0.18 & 1 & 1 & 0.83 $\pm$ 0.02 & 0.83 $\pm$ 0.02 & 4.31 $\pm$ 0.05 & 4.25 $\pm$ 0.04\\
|
||||
0.21 & 1 & 1 & 0.77 $\pm$ 0.03 & 0.77 $\pm$ 0.03 & 4.42 $\pm$ 0.05 & 4.34 $\pm$ 0.05\\
|
||||
0.24 & 1 & 1 & 0.82 $\pm$ 0.02 & 0.82 $\pm$ 0.02 & 4.25 $\pm$ 0.04 & 4.31 $\pm$ 0.05\\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
|
||||
\paragraph{Results}
|
||||
|
||||
For the model comparison, when \(\eps[\alpha]\) is small
|
||||
(\(\eps[\alpha]\in[0, .04]\)), the simulation model is close to the
|
||||
Erd\H{o}s-Reńyi network and it is very hard to find any structure beyond
|
||||
the one of a single block on each dimension.
|
||||
|
|
@ -1,11 +1,18 @@
|
|||
```{r libraries, echo = FALSE, include = FALSE}
|
||||
require("ggplot2")
|
||||
require("knitr")
|
||||
# require("kableExtra")
|
||||
require("tidyr")
|
||||
require("dplyr")
|
||||
require("patchwork")
|
||||
require("latex2exp")
|
||||
```
|
||||
|
||||
```{r setup, echo = FALSE, include= FALSE}
|
||||
options(knitr.table.knitr.table.format = "latex")
|
||||
```
|
||||
|
||||
|
||||
```{r import-data, echo = FALSE}
|
||||
filenames <- list.files(
|
||||
path = "./data/",
|
||||
|
|
@ -113,7 +120,7 @@ Finally, when $\eps[\pi] > 0$ or $\bm{\pi}^1 \neq \bm{\pi}^2$ and
|
|||
$\eps[\rho] > 0$ or $\bm{\rho}^1 \neq \bm{\rho}^2$,
|
||||
the model is a $\pi\rho\text{-}colBiSBM$.
|
||||
|
||||
```{r tables, echo = FALSE}
|
||||
```{r tables, echo = FALSE, results='asis'}
|
||||
kable(
|
||||
(model_comparison_eps_pi %>%
|
||||
select(-one_of("n")) %>%
|
||||
|
|
@ -122,22 +129,27 @@ kable(
|
|||
values_from = prop_model,
|
||||
values_fill = 0
|
||||
) %>% group_by(epsilon_pi) %>%
|
||||
summarise(rec_Q1 = mean(rec_Q1),
|
||||
summarise(
|
||||
rec_Q1 = mean(rec_Q1),
|
||||
iid = sum(iid),
|
||||
pi = sum(pi),
|
||||
rho = sum(rho),
|
||||
pirho = sum(pirho)))[,c(1,3:6, 2)],
|
||||
pirho = sum(pirho)
|
||||
))[, c(1, 3:6, 2)],
|
||||
digits = 2,
|
||||
col.names = c(
|
||||
"$\\eps[\\pi]$",
|
||||
"$iid\\text{-}colBiSBM$",
|
||||
"$iid\\text{-}colBiSBM$ ",
|
||||
"$\\pi\\text{-}colBiSBM$",
|
||||
"$\\rho\\text{-}colBiSBM$",
|
||||
"$\\pi\\rho\\text{-}colBiSBM$",
|
||||
"Recovered $Q_1$"
|
||||
), align = "lcccc",
|
||||
booktab = TRUE,
|
||||
position = "!h",
|
||||
escape = FALSE,
|
||||
caption = "\\label{tab:pi-model-sel}Model selection for varying $\\pi$ mixture parameters"
|
||||
)
|
||||
) %>% kableExtra::add_header_above(c(" " = 1, "Proportions of model selection" = 4, "Blocks" = 1))
|
||||
kable(
|
||||
(model_comparison_eps_rho %>%
|
||||
select(-one_of("n")) %>%
|
||||
|
|
@ -154,14 +166,17 @@ kable(
|
|||
digits = 2,
|
||||
col.names = c(
|
||||
"$\\eps[\\rho]$",
|
||||
"$iid\\text{-}colBiSBM$",
|
||||
"$iid\\text{-}colBiSBM$ ",
|
||||
"$\\pi\\text{-}colBiSBM$",
|
||||
"$\\rho\\text{-}colBiSBM$",
|
||||
"$\\pi\\rho\\text{-}colBiSBM$",
|
||||
"Recovered $Q_2$"
|
||||
), align = "lcccc",
|
||||
booktab = TRUE,
|
||||
position = "!h",
|
||||
escape = FALSE,
|
||||
caption = "\\label{tab:rho-model-sel}Model selection for varying $\\rho$ mixture parameters"
|
||||
)
|
||||
) %>% kableExtra::add_header_above(c(" " = 1, "Proportions of model selection" = 4, "Blocks" = 1))
|
||||
```
|
||||
|
||||
\begin{figure}[H]
|
||||
|
|
@ -170,7 +185,7 @@ kable(
|
|||
\label{fig:pref_model_func_eps}
|
||||
\end{figure}
|
||||
|
||||
On the figure \ref{fig:pref_model_func_eps} and tables \ref{tab:pi-model-sel}
|
||||
\paragraph{Results:}On the figure \ref{fig:pref_model_func_eps} and tables \ref{tab:pi-model-sel}
|
||||
and \ref{tab:rho-model-sel}, one can see that there is a turning
|
||||
point around $\eps[\pi] = 0.2$ (resp. $\eps[\rho] = 0.2$), before which
|
||||
$iid\text{-}colBiSBM$
|
||||
|
|
@ -180,5 +195,5 @@ $\rho\text{-}colBiSBM$) and
|
|||
$\pi\rho\text{-}colBiSBM$ gets more and more selected, highlighting our
|
||||
capacity to recover the simulated structure.
|
||||
|
||||
Please note that when "Recovered $Q_1$(or $Q_2$)" is not an integer it's because
|
||||
\paragraph*{Remark:} Please note that when "Recovered $Q_1$(or $Q_2$)" is not an integer it's because
|
||||
some procedures returned a value other than 3.
|
||||
|
|
@ -49,315 +49,53 @@ is a \(\rho\text{-}colBiSBM\). Finally, when \(\eps[\pi] > 0\) or
|
|||
\(\bm{\rho}^1 \neq \bm{\rho}^2\), the model is a
|
||||
\(\pi\rho\text{-}colBiSBM\).
|
||||
|
||||
\begin{longtable}[]{@{}lccccl@{}}
|
||||
\caption{\label{tab:pi-model-sel}Model selection for varying \(\pi\)
|
||||
mixture parameters}\tabularnewline
|
||||
\toprule
|
||||
\begin{minipage}[b]{0.08\columnwidth}\raggedright
|
||||
\(\eps[\pi]\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering
|
||||
\(iid\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering
|
||||
\(\pi\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\centering
|
||||
\(\rho\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.18\columnwidth}\centering
|
||||
\(\pi\rho\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.11\columnwidth}\raggedright
|
||||
Recovered \(Q_1\)\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\midrule
|
||||
\endfirsthead
|
||||
\toprule
|
||||
\begin{minipage}[b]{0.08\columnwidth}\raggedright
|
||||
\(\eps[\pi]\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering
|
||||
\(iid\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering
|
||||
\(\pi\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\centering
|
||||
\(\rho\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.18\columnwidth}\centering
|
||||
\(\pi\rho\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.11\columnwidth}\raggedright
|
||||
Recovered \(Q_1\)\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\midrule
|
||||
\endhead
|
||||
\begin{minipage}[t]{0.08\columnwidth}\raggedright
|
||||
0.00\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.65\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.00\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.35\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.00\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.00\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.08\columnwidth}\raggedright
|
||||
0.04\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.66\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.00\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.34\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.00\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.00\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.08\columnwidth}\raggedright
|
||||
0.07\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.64\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.01\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.34\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.01\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.01\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.08\columnwidth}\raggedright
|
||||
0.11\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.63\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.03\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.31\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.03\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.01\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.08\columnwidth}\raggedright
|
||||
0.14\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.55\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.12\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.28\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.05\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.00\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.08\columnwidth}\raggedright
|
||||
0.18\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.39\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.26\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.21\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.13\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.01\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.08\columnwidth}\raggedright
|
||||
0.21\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.23\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.42\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.13\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.23\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.01\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.08\columnwidth}\raggedright
|
||||
0.25\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.10\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.56\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.05\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.29\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.02\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.08\columnwidth}\raggedright
|
||||
0.28\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.01\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.65\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.01\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.33\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.01\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\bottomrule
|
||||
\end{longtable}
|
||||
\begin{table}[!h]
|
||||
|
||||
\begin{longtable}[]{@{}lccccl@{}}
|
||||
\caption{\label{tab:rho-model-sel}Model selection for varying \(\rho\)
|
||||
mixture parameters}\tabularnewline
|
||||
\caption{\label{tab:tables}\label{tab:pi-model-sel}Model selection for varying $\pi$ mixture parameters}
|
||||
\centering
|
||||
\begin{tabular}[t]{lccccl}
|
||||
\toprule
|
||||
\begin{minipage}[b]{0.09\columnwidth}\raggedright
|
||||
\(\eps[\rho]\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering
|
||||
\(iid\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering
|
||||
\(\pi\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\centering
|
||||
\(\rho\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.18\columnwidth}\centering
|
||||
\(\pi\rho\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.11\columnwidth}\raggedright
|
||||
Recovered \(Q_2\)\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\multicolumn{1}{c}{ } & \multicolumn{4}{c}{Proportions of model selection} & \multicolumn{1}{c}{Blocks} \\
|
||||
\cmidrule(l{3pt}r{3pt}){2-5} \cmidrule(l{3pt}r{3pt}){6-6}
|
||||
$\eps[\pi]$ & $iid\text{-}colBiSBM$ & $\pi\text{-}colBiSBM$ & $\rho\text{-}colBiSBM$ & $\pi\rho\text{-}colBiSBM$ & Recovered $Q_1$\\
|
||||
\midrule
|
||||
\endfirsthead
|
||||
\toprule
|
||||
\begin{minipage}[b]{0.09\columnwidth}\raggedright
|
||||
\(\eps[\rho]\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering
|
||||
\(iid\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.15\columnwidth}\centering
|
||||
\(\pi\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\centering
|
||||
\(\rho\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.18\columnwidth}\centering
|
||||
\(\pi\rho\text{-}colBiSBM\)\strut
|
||||
\end{minipage} & \begin{minipage}[b]{0.11\columnwidth}\raggedright
|
||||
Recovered \(Q_2\)\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\midrule
|
||||
\endhead
|
||||
\begin{minipage}[t]{0.09\columnwidth}\raggedright
|
||||
0.00\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.63\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.37\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.00\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.00\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.00\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.09\columnwidth}\raggedright
|
||||
0.04\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.65\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.34\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.00\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.01\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.00\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.09\columnwidth}\raggedright
|
||||
0.07\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.64\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.33\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.01\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.01\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.00\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.09\columnwidth}\raggedright
|
||||
0.11\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.64\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.31\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.03\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.02\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.00\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.09\columnwidth}\raggedright
|
||||
0.14\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.53\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.29\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.11\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.06\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.00\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.09\columnwidth}\raggedright
|
||||
0.18\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.42\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.20\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.24\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.14\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.01\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.09\columnwidth}\raggedright
|
||||
0.21\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.25\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.12\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.40\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.22\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.01\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.09\columnwidth}\raggedright
|
||||
0.25\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.08\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.06\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.58\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.29\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.01\strut
|
||||
\end{minipage}\tabularnewline
|
||||
\begin{minipage}[t]{0.09\columnwidth}\raggedright
|
||||
0.28\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.01\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.15\columnwidth}\centering
|
||||
0.01\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\centering
|
||||
0.65\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.18\columnwidth}\centering
|
||||
0.32\strut
|
||||
\end{minipage} & \begin{minipage}[t]{0.11\columnwidth}\raggedright
|
||||
3.00\strut
|
||||
\end{minipage}\tabularnewline
|
||||
0.00 & 0.65 & 0.00 & 0.35 & 0.00 & 3.00\\
|
||||
0.04 & 0.66 & 0.00 & 0.34 & 0.00 & 3.00\\
|
||||
0.07 & 0.64 & 0.01 & 0.34 & 0.01 & 3.01\\
|
||||
0.11 & 0.63 & 0.03 & 0.31 & 0.03 & 3.01\\
|
||||
0.14 & 0.55 & 0.12 & 0.28 & 0.05 & 3.00\\
|
||||
\addlinespace
|
||||
0.18 & 0.39 & 0.26 & 0.21 & 0.13 & 3.01\\
|
||||
0.21 & 0.23 & 0.42 & 0.13 & 0.23 & 3.01\\
|
||||
0.25 & 0.10 & 0.56 & 0.05 & 0.29 & 3.02\\
|
||||
0.28 & 0.01 & 0.65 & 0.01 & 0.33 & 3.01\\
|
||||
\bottomrule
|
||||
\end{longtable}
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
|
||||
\begin{table}[!h]
|
||||
|
||||
\caption{\label{tab:tables}\label{tab:rho-model-sel}Model selection for varying $\rho$ mixture parameters}
|
||||
\centering
|
||||
\begin{tabular}[t]{lccccl}
|
||||
\toprule
|
||||
\multicolumn{1}{c}{ } & \multicolumn{4}{c}{Proportions of model selection} & \multicolumn{1}{c}{Blocks} \\
|
||||
\cmidrule(l{3pt}r{3pt}){2-5} \cmidrule(l{3pt}r{3pt}){6-6}
|
||||
$\eps[\rho]$ & $iid\text{-}colBiSBM$ & $\pi\text{-}colBiSBM$ & $\rho\text{-}colBiSBM$ & $\pi\rho\text{-}colBiSBM$ & Recovered $Q_2$\\
|
||||
\midrule
|
||||
0.00 & 0.63 & 0.37 & 0.00 & 0.00 & 3.00\\
|
||||
0.04 & 0.65 & 0.34 & 0.00 & 0.01 & 3.00\\
|
||||
0.07 & 0.64 & 0.33 & 0.01 & 0.01 & 3.00\\
|
||||
0.11 & 0.64 & 0.31 & 0.03 & 0.02 & 3.00\\
|
||||
0.14 & 0.53 & 0.29 & 0.11 & 0.06 & 3.00\\
|
||||
\addlinespace
|
||||
0.18 & 0.42 & 0.20 & 0.24 & 0.14 & 3.01\\
|
||||
0.21 & 0.25 & 0.12 & 0.40 & 0.22 & 3.01\\
|
||||
0.25 & 0.08 & 0.06 & 0.58 & 0.29 & 3.01\\
|
||||
0.28 & 0.01 & 0.01 & 0.65 & 0.32 & 3.00\\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
|
||||
\begin{figure}[H]
|
||||
\includegraphics{./Rcodes/simulation/img/plot_model_function_eps.png}
|
||||
|
|
@ -365,6 +103,8 @@ Recovered \(Q_2\)\strut
|
|||
\label{fig:pref_model_func_eps}
|
||||
\end{figure}
|
||||
|
||||
\paragraph{Results:}
|
||||
|
||||
On the figure \ref{fig:pref_model_func_eps} and tables
|
||||
\ref{tab:pi-model-sel} and \ref{tab:rho-model-sel}, one can see that
|
||||
there is a turning point around \(\eps[\pi] = 0.2\) (resp.
|
||||
|
|
@ -375,5 +115,7 @@ most of the times and after \(0.2\) the \(\pi\text{-}colBiSBM\) (resp.
|
|||
more selected, highlighting our capacity to recover the simulated
|
||||
structure.
|
||||
|
||||
\paragraph*{Remark:}
|
||||
|
||||
Please note that when ``Recovered \(Q_1\)(or \(Q_2\))'' is not an
|
||||
integer it's because some procedures returned a value other than 3.
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
require("ggplot2")
|
||||
require("tictoc")
|
||||
require("colSBM")
|
||||
|
||||
devtools::load_all("R/")
|
||||
|
||||
result_clustering <- readRDS("simulation/data/simulated_collection_clustering_rho_10-05-23-14:40:46.Rds")
|
||||
result_clustering <- readRDS("./Rcodes/simulation/data/simulated")
|
||||
|
||||
list_clustering <- lapply(
|
||||
seq_along(result_clustering), function(s) result_clustering[[s]]$list_of_clusterings
|
||||
|
|
@ -13,7 +12,7 @@ list_best_partition <- lapply(
|
|||
seq_along(list_clustering), function(s) {
|
||||
list(
|
||||
epsilon = result_clustering[[s]]$epsilon,
|
||||
best_partition = extract_bipartite_best_partition(list_clustering[[s]])
|
||||
best_partition = unlist(extract_best_bipartite_partition(list_clustering[[s]]))
|
||||
)
|
||||
}
|
||||
)
|
||||
|
|
|
|||
19
Rcodes/simulation/netclustering_analyze.Rmd
Normal file
19
Rcodes/simulation/netclustering_analyze.Rmd
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
```{r libraries, echo = FALSE, include = FALSE}
|
||||
require("ggplot2")
|
||||
require("tidyr")
|
||||
require("dplyr")
|
||||
require("patchwork")
|
||||
require("latex2exp")
|
||||
```
|
||||
|
||||
\section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}
|
||||
|
||||
```{r impoting-data, echo = FALSE}
|
||||
filenames <- list.files(
|
||||
path = "./data/",
|
||||
pattern = "simulated_collection_clustering_*",
|
||||
full.names = TRUE
|
||||
)
|
||||
# data_list <- lapply(filenames, function(file) lapply(readRDS(file), function(model) model$list_clustering))
|
||||
data_list <- lapply(filenames, readRDS)
|
||||
```
|
||||
1
Rcodes/simulation/netclustering_analyze.tex
Normal file
1
Rcodes/simulation/netclustering_analyze.tex
Normal file
|
|
@ -0,0 +1 @@
|
|||
\section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}
|
||||
|
|
@ -1,11 +1,14 @@
|
|||
#!/usr/bin/env Rscript
|
||||
require("knitr")
|
||||
#!/usr/bin/Rscript
|
||||
|
||||
print(getwd())
|
||||
|
||||
options(knitr.table.format = "latex")
|
||||
create_latex <- function(f) {
|
||||
knitr::knit(f, "/tmp/tmp-outputfile.md")
|
||||
newname <- paste0(tools::file_path_sans_ext(f), ".tex")
|
||||
mess <- paste("pandoc --extract-media=./img -f markdown -t latex -p -o", shQuote(newname), "/tmp/tmp-outputfile.md")
|
||||
mess <- paste("pandoc --extract-media=./img -f markdown -t latex -p /tmp/tmp-outputfile.md -o", shQuote(newname))
|
||||
system(mess)
|
||||
}
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
create_latex(args)
|
||||
create_latex(unlist(args))
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 65 KiB |
|
|
@ -70,3 +70,5 @@ De plus j'ai beaucoup progressé dans les domaines abordés pendant mon
|
|||
stage, et cela m'a rendu confiant dans le choix de faire le
|
||||
master \emph{MathSV} pour l'année scolaire 2023-2024. Ce stage a donc été
|
||||
déterminant et confirme l'orientation de mon parcours professionnel.
|
||||
|
||||
\paragraph*{Note} La suite de ce rapport a été rédigée en anglais.
|
||||
BIN
rapport.pdf
BIN
rapport.pdf
Binary file not shown.
31
rapport.tex
31
rapport.tex
|
|
@ -21,6 +21,7 @@
|
|||
\usepackage{rotating} % For allowing to rotate figures
|
||||
\usepackage{svg} % To allow svg inclusions
|
||||
\usepackage{float} % To allow Pandoc to control figure placement
|
||||
% \usepackage{booktabs} % For good tables
|
||||
|
||||
%% Bibliography
|
||||
\usepackage[style=apa,citestyle=authoryear-comp]{biblatex}
|
||||
|
|
@ -272,23 +273,25 @@ This model supposes that:
|
|||
\label{fig:LBMvisu}
|
||||
\end{figure}
|
||||
|
||||
Parameters
|
||||
% TODO fix parameters according to presentation
|
||||
\begin{itemize}
|
||||
\item $Q_1 = \{{\color{blueind}\bullet},{\color{cyanind}\bullet},{\color{electricblue}\bullet}\}$ blocks in rows
|
||||
\item $Q_2 = \{{\color{burntorange}\bullet},{\color{goldenyellow}\bullet},{\color{yellow}\bullet}\}$ blocks in columns
|
||||
\item $\pi_{\bullet} = \mathbb{P}(i\in\bullet)$ in row and $\rho_{\bullet} = \mathbb{P}(j\in\bullet)$ in column
|
||||
\item $\alpha_{{\color{blueind}\bullet}{\color{burntorange}\bullet}} = \mathbb{P}(i \leftrightarrow j | i \in {\color{blueind}\bullet}, j \in {\color{burntorange}\bullet})$ connectivity probability between two nodes, given their clustering
|
||||
\item $Q_1 = |\{{\color{blueind}\bullet},{\color{cyanind}\bullet},{\color{electricblue}\bullet}\}|$ \emph{given} blocks in rows
|
||||
\item $Q_2 = |\{{\color{burntorange}\bullet},{\color{goldenyellow}\bullet},{\color{yellow}\bullet}\}|$ \emph{given} blocks in columns
|
||||
\end{itemize}
|
||||
Parameters
|
||||
\begin{itemize}
|
||||
\item $\pi_{\bullet} = \mathbb{P}(Z_i = \bullet)$ for rows and $\rho_{\bullet} = \mathbb{P}(W_j = \bullet)$ for columns
|
||||
\item $\alpha_{{\color{blueind}\bullet}{\color{burntorange}\bullet}} = \mathbb{P}(X_{ij} = 1 | Z_i = {\color{blueind}\bullet}, W_j = {\color{burntorange}\bullet})$, probability of connectivity knowing node membership blocks.
|
||||
\end{itemize}
|
||||
|
||||
On \ref{fig:LBMvisu}, $\pi$ are the probabilities for a row node to belong to
|
||||
the row block of corresponding color, $\rho$ are the probabilities for a column
|
||||
node to belong to the column block of corresponding color and $\alpha$ are the
|
||||
On \ref{fig:LBMvisu}, $\bm{\pi}$ are the probabilities for a row node to belong to
|
||||
the row block of corresponding color, $\bm{\rho}$ are the probabilities for a column
|
||||
node to belong to the column block of corresponding color and $\bm{\alpha}$ are the
|
||||
connectivity parameters between the row and column blocks.
|
||||
|
||||
This model can be used to easily generate bipartite graphs with complex and very
|
||||
varied structures. But when trying to determine the structure of a given network
|
||||
we need to find those parameters.
|
||||
we need to find those parameters and as the row and column block memberships are
|
||||
\emph{latent} i.e.,\ they are not known and must be inferred.
|
||||
|
||||
For this a common approach is to use a VEM algorithm
|
||||
(proposed for SBM in ~\cite{daudinMixtureModelRandom2008} and for LBM in ~\cite{govaertEMAlgorithmBlock2005})
|
||||
|
|
@ -316,7 +319,7 @@ We define a collection of bipartite networks as $\bm{X} = (X^1, \dots, X^M)$
|
|||
the collection of incidence matrix. Moreover, all the networks in the collection
|
||||
have the same type of interaction (e.g., all interactions are binary).
|
||||
|
||||
\section{Separate BiSBM (sepBiSBM)}\label{sec:separate-bisbm-sepbisbm}
|
||||
\section{Separate BiSBM (sep-BiSBM)}\label{sec:separate-bisbm-sepbisbm}
|
||||
|
||||
A first approach to deal with a collection of networks is to adjust separate
|
||||
BiSBM for each network of the collection.
|
||||
|
|
@ -531,9 +534,6 @@ $\bm{\tau}$. \\
|
|||
% \mathcal{J}(\mathcal{R};\bm{\theta}) \coloneqq \mathbb{E}_{\mathcal{R}}[\ell(\bm{X},\bm{Z},\bm{W};\bm{\theta})] + \mathcal{H}(\bm{Z,W}) \leq \ell(\bm{X};\bm{\theta})
|
||||
% \end{equation*}
|
||||
|
||||
|
||||
% TODO Develop the formula
|
||||
|
||||
The VEM algorithm alternates between two steps, the variational E step and the M step.
|
||||
The E steps consists in optimizing $\mathcal{J}(\bm{\tau};\bm{\theta})$ for a
|
||||
current value of $\bm{\theta}$ with respect to $\bm{\tau}$. And the M step
|
||||
|
|
@ -1060,8 +1060,9 @@ We illustrate our capacity to perform a partition of a collection for all
|
|||
colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}.
|
||||
|
||||
\chapter{Simulation studies}\label{chap:simulation-studies}
|
||||
\include{Rcodes/simulation/inference_analyze}
|
||||
\include{Rcodes/simulation/model_selection_analyze}
|
||||
\section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}
|
||||
\include{Rcodes/simulation/netclustering_analyze}
|
||||
|
||||
\chapter{Applications}
|
||||
\include{Rcodes/real_data/application_dore_data}
|
||||
|
|
|
|||
|
|
@ -198,6 +198,25 @@
|
|||
file = {/home/polarolouis/Zotero/storage/6F8YT8AD/holland1983.pdf.pdf;/home/polarolouis/Zotero/storage/7DSZ3KD9/Holland et al. - 1983 - Stochastic blockmodels First steps.pdf;/home/polarolouis/Zotero/storage/DUL2RV8Q/holland1983.pdf.pdf;/home/polarolouis/Zotero/storage/G9KZBG9W/0378873383900217.html}
|
||||
}
|
||||
|
||||
@article{hubertComparingPartitions1985,
|
||||
title = {Comparing Partitions},
|
||||
author = {Hubert, Lawrence and Arabie, Phipps},
|
||||
date = {1985-12-01},
|
||||
journaltitle = {Journal of Classification},
|
||||
shortjournal = {Journal of Classification},
|
||||
volume = {2},
|
||||
number = {1},
|
||||
pages = {193--218},
|
||||
issn = {1432-1343},
|
||||
doi = {10.1007/BF01908075},
|
||||
url = {https://doi.org/10.1007/BF01908075},
|
||||
urldate = {2023-07-04},
|
||||
abstract = {The problem of comparing two different partitions of a finite set of objects reappears continually in the clustering literature. We begin by reviewing a well-known measure of partition correspondence often attributed to Rand (1971), discuss the issue of correcting this index for chance, and note that a recent normalization strategy developed by Morey and Agresti (1984) and adopted by others (e.g., Miligan and Cooper 1985) is based on an incorrect assumption. Then, the general problem of comparing partitions is approached indirectly by assessing the congruence of two proximity matrices using a simple cross-product measure. They are generated from corresponding partitions using various scoring rules. Special cases derivable include traditionally familiar statistics and/or ones tailored to weight certain object pairs differentially. Finally, we propose a measure based on the comparison of object triples having the advantage of a probabilistic interpretation in addition to being corrected for chance (i.e., assuming a constant value under a reasonable null hypothesis) and bounded between ±1.},
|
||||
langid = {english},
|
||||
keywords = {Consensus indices,Measures of agreement,Measures of association},
|
||||
file = {/home/polarolouis/Zotero/storage/7TKW7HEM/Hubert et Arabie - 1985 - Comparing partitions.pdf}
|
||||
}
|
||||
|
||||
@article{kaszewska-gilasGlobalStudiesHostParasite2021,
|
||||
title = {Global {{Studies}} of the {{Host-Parasite Relationships}} between {{Ectoparasitic Mites}} of the {{Family Syringophilidae}} and {{Birds}} of the {{Order Columbiformes}}},
|
||||
author = {Kaszewska-Gilas, Katarzyna and Kosicki, Jakub Ziemowit and Hromada, Martin and Skoracki, Maciej},
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue