diff --git a/rapport/chapter4-simulations/inference.tex b/rapport/chapter4-simulations/inference.tex index ddfb310..8582a52 100644 --- a/rapport/chapter4-simulations/inference.tex +++ b/rapport/chapter4-simulations/inference.tex @@ -1,9 +1,10 @@ \section{Efficiency of the inference} +\label{sec:efficiency-of-the-inference} The goal here is to assess the quality of the inference procedure. \paragraph{Simulation settings} For this simulation the data is simulated with -$M = 2, n_{1}^{m} = 120, n_{2}^{m} = 120, Q_1 = Q_2 = 4$, $\bm{\alpha}, \bm{\pi}$ +$M = 2, n_{1}^{m} = 120,~n_{2}^{m} = 120,~Q_1 = Q_2 = 4$, $\bm{\alpha}, \bm{\pi}$ and $\bm{\rho}$ are set as follows: \begin{align*} & \bm{\alpha} = .25 + @@ -76,7 +77,7 @@ use the following indicators: the real ones by computing the mean of the ARI per axis over the two networks \begin{equation*} - \overline{\text{ARI}}_d = \frac{1}{2} \text{ARI}\big( \text{ARI}(\widehat{\bm{Z}^1_d},\bm{Z}^1_d) + \text{ARI}(\widehat{\bm{Z}^2_d},\bm{Z}^2_d) \big), + \overline{\text{ARI}}_d = \frac{1}{2} \big( \text{ARI}(\widehat{\bm{Z}^1_d},\bm{Z}^1_d) + \text{ARI}(\widehat{\bm{Z}^2_d},\bm{Z}^2_d) \big), \end{equation*} where $d$ is the dimension or axis (i.e., rows, $d=1$, or columns, $d=2$) of the block memberships. @@ -88,20 +89,26 @@ use the following indicators: \end{itemize} All these quality indicators are averaged over the 108 datasets. The results are -provided in the tables \ref{tab:per_model_sep} to \ref{tab:per_model_pirho}. Each line corresponds to the -108 datasets for a given value of $\eps[\alpha]$. +provided in the tables \ref{tab:inference_results_iid} to \ref{tab:inference_results_pirho}. Each line corresponds to the +108 datasets for a given value of $\eps[\alpha]$. Graphical representation of +some results are shown on figures~\ref{fig:inference-prop-modele-pref} +and~\ref{fig:inference-ari-plots}. \begin{figure}[ht] \centering \input{../tikz/simulations/inference/model-proportions.tex} \caption{Preferred model proportions over all datasets in function of $\eps[\alpha]$} - \label{fig:prop-modele-pref} + \label{fig:inference-prop-modele-pref} \end{figure} -\foreach \modelname in {sep, iid, pi, rho, pirho}{ - \input{../tables/simulations/inference/\modelname.tex} - } +\begin{figure}[H] + \centering + \input{../tikz/simulations/inference/ari-plots} + \caption{Plot of the ARI quality indicators in function of + $\eps[\alpha]$} + \label{fig:inference-ari-plots} +\end{figure} \paragraph{Results} For the model comparison, when $\eps[\alpha]$ is small @@ -109,14 +116,12 @@ For the model comparison, when $\eps[\alpha]$ is small Erd\H{o}s-Reńyi network, and it is very hard to find any structure beyond the one of a single block on each dimension. -On the figure \ref{fig:inference-proportion-preferred} and table -\ref{tab:proportion-preferred-table} we can see that from -$\eps[\alpha] = 0.06$ around $70\%$ of the time the $\pi\rho\text{-}colBiSBM$ -model (i.e., the correct one) is selected. +On the figure \ref{fig:inference-prop-modele-pref} one can see that from +$\eps[\alpha] = 0.06$ around $70\%$ of the time the +$\pi\rho\text{-}colBiSBM$ model (i.e., the correct one) is selected. An interesting result we can read in the tables is that our models outperform the $sep\text{-}BiSBM$ when considering the ARI on the whole set of nodes ($\text{ARI}_d$). This means that our models are able to recover the block pairing \emph{between the networks} in addition to recovering the blocks and -their parameters. -\clearpage \ No newline at end of file +their parameters. \ No newline at end of file diff --git a/rapport/chapter4-simulations/information-transfer.tex b/rapport/chapter4-simulations/information-transfer.tex index 52b932c..51cd89f 100644 --- a/rapport/chapter4-simulations/information-transfer.tex +++ b/rapport/chapter4-simulations/information-transfer.tex @@ -4,5 +4,6 @@ between the networks, allowing robustness to missing data and enabling the finding of finer structures in small networks with the help of bigger ones. \subsection{Missing edges robustness} +\input{chapter4-simulations/na-robustness} \subsection{Finer structure detection on small networks} \ No newline at end of file diff --git a/rapport/chapter4-simulations/model-selection.tex b/rapport/chapter4-simulations/model-selection.tex index 3ef008e..6f89b94 100644 --- a/rapport/chapter4-simulations/model-selection.tex +++ b/rapport/chapter4-simulations/model-selection.tex @@ -64,7 +64,7 @@ $\pi\rho\text{-}colBiSBM$. function of $\eps[\pi]$ and $\eps[\rho]$} \end{figure} -\paragraph{Results:} +\paragraph{Results} On the figure \ref{fig:pref_model_func_eps} and table \ref{tab:model-selection}, one can see that there is a turning point around $\eps[\pi] = 0.2$ (resp. diff --git a/rapport/chapter4-simulations/na-robustness.tex b/rapport/chapter4-simulations/na-robustness.tex new file mode 100644 index 0000000..f9ffe98 --- /dev/null +++ b/rapport/chapter4-simulations/na-robustness.tex @@ -0,0 +1,62 @@ +\paragraph{Simulation settings} We want to compare the performance of retrieving +the nodes blocks with missing edges (that are labeled as \texttt{NA} in the +incidence matrix). + +For this purpose we generate collections of networks with the following +parameters: +\begin{align*} + \bm{\pi}^m = \begin{cases} + \bm{\pi} = \left( 0.5, 0.3, 0.2 \right) & \text{for } iid\text{-}colBiSBM \\ + \sigma_1^m(\bm{\pi}) & \text{for } \pi\text{-}colBiSBM \text{ and } \pi\rho\text{-}colBiSBM + \end{cases} \\ + \bm{\rho}^m = + \begin{cases} + \bm{\rho} = \left( 0.5, 0.3, 0.2 \right) & \text{for } iid\text{-}colBiSBM \\ + \sigma_2^m(\bm{\rho}) & \text{for } \rho\text{-}colBiSBM \text{ and } \pi\rho\text{-}colBiSBM, + \end{cases} +\end{align*} +for the block proportions, and two different structures with the corresponding +$\bm{\alpha}$, +\begin{align*} + \bm{\alpha}^{modular} = \begin{pmatrix} + 0.9 & 0.05 & 0.05 \\ + 0.05 & 0.2 & 0.05 \\ + 0.05 & 0.05 & 0.8 + \end{pmatrix}, & + \bm{\alpha}^{nested} = \begin{pmatrix} + 0.9 & 0.25 & 0.1 \\ + 0.3 & 0.15 & 0.05 \\ + 0.1 & 0.05 & 0.05 + \end{pmatrix}, +\end{align*} + +where $\bm{\alpha}^{modular}$ represents networks where there are look-a-like +communities, which tends to interact preferentially within the community and +less with the other communities. And $\bm{\alpha}^{nested}$ represents a common +structure detected in ecology with generalist and specialist species and a +\enquote{nested} structure. + +The collections contain two networks of size $n^{m=1}_1 = n^{m=1}_2 = 40$ and +$n^{m=2}_1 = n^{m=2}_2 = 120$. One collection is generated for each $colBiSBM$ +model. And the nodes block memberships (i.e., the row and column blocks they +belong to) are saved. + +In the network $m=1$ (i.e., the smaller one) a proportion of the edges +$p_{\texttt{NA}}$ see their values replaced by \texttt{NA}s, the +\enquote{forgotten} values are stored. + +\paragraph{Test procedure} A LBM is fitted on the first network, and the +predicted block memberships are saved, along with the predicted links using the +inferred parameters. This will serve as a baseline to see if the use of the +collection benefits the predictions. + +A $colBiSBM$ model is then fitted (with a model matching the dataset considered) +and we store the same predictions. + +\paragraph{Quality metrics} To benchmark the performance we use the +\emph{Area Under the Curve} (AUC) for predicted versus real link values and the +ARI for predicted versus real block memberships. + +\paragraph{Results} + + diff --git a/rapport/chapter4-simulations/network-clustering.tex b/rapport/chapter4-simulations/network-clustering.tex index 1ff887d..5d8b353 100644 --- a/rapport/chapter4-simulations/network-clustering.tex +++ b/rapport/chapter4-simulations/network-clustering.tex @@ -1,28 +1,30 @@ +\clearpage \section{Network clustering of simulated networks} \label{sec:network-clustering-of-simulated-networks} -\paragraph{Simulation settings} For all models we simulate $M = 9$ networks with -$\forall m \in \{ 1 \dots M \} , n^m_1 = n^m_2 = 75$ with $Q_1 = Q_2 = 3$. For -the simulations the proportions are the following: +\paragraph{Simulation settings} For all models we simulate $M = 9$ networks +with~$\forall m \in \{ 1 \dots M \} , n^m_1 = n^m_2 = 75$ with $Q_1 = Q_2 = 3$.\newline +For the simulations the proportions are the following: \begin{align*} \bm{\pi}^1 = \left( 0.2, 0.3, 0.5 \right) & & \bm{\rho}^1 = \left( 0.2, 0.3, 0.5 \right) \\ \end{align*} and for all $m = 2,\dots,9$ \begin{align*} \bm{\pi}^m = \begin{cases} \bm{\pi}^1 & \text{for } iid\text{-}colBiSBM \\ - \sigma^1_m(\bm{\pi}^1) & \text{for } \pi\text{-}colBiSBM \text{ and } \pi\rho\text{-}colBiSBM + \sigma_1^m(\bm{\pi}^1) & \text{for } \pi\text{-}colBiSBM \text{ and } \pi\rho\text{-}colBiSBM \end{cases} \\ \bm{\rho}^m = \begin{cases} \bm{\rho}^1 & \text{for } iid\text{-}colBiSBM \\ - \sigma^2_m(\bm{\rho}^1) & \text{for } \rho\text{-}colBiSBM \text{ and } \pi\rho\text{-}colBiSBM + \sigma_2^m(\bm{\rho}^1) & \text{for } \rho\text{-}colBiSBM \text{ and } \pi\rho\text{-}colBiSBM \end{cases} \end{align*} -where $\sigma^1_m$ and $\sigma^2_m$ are permutations of {1, 2, 3} proper to network $m$ and -$\sigma^1 (\pi)= {(\pi_{\sigma^1 (i)})}_{i=\{1,\dots,3\}}$ -and $\sigma^2 (\rho)= {(\rho_{\sigma^2 (i)})}_{i=\{1,\dots,3\}}$. -The networks are divided into 3 sub-collections of 3 -networks with connectivity parameters as follows: +where $\sigma_1^m$ and $\sigma_2^m$ are permutations of \{1, 2, 3\} proper to network $m$ and +$\sigma_1 (\pi)= {(\pi_{\sigma_1 (i)})}_{i=\{1,\dots,3\}}$ +and $\sigma_2 (\rho)= {(\rho_{\sigma_2 (i)})}_{i=\{1,\dots,3\}}$. + +The networks are divided into 3 sub-collections of 3 networks with connectivity +parameters as follows: \begin{align*} \bm{\alpha}^{as} = .3 + \begin{pmatrix} \epsilon & - \frac{\epsilon}{2} & - \frac{\epsilon}{2} \\ @@ -50,6 +52,13 @@ matrices are equal and the 9 networks have the same connection structure. Increasing $\epsilon$ differentiates the 3 sub-collections of networks. % ARI boxplot +\begin{figure}[H] + \centering + \input{../tikz/simulations/clustering/ari-clustering.tex} + \caption{ARI obtained for the clustering with the different models in + function of $\epsilon$} + \label{fig:ari-clustering-boxplot} +\end{figure} \paragraph{Results} The evaluation of our method involves a comparison between the resulting partition of the network collection and the simulated partition diff --git a/rapport/rapport.pdf b/rapport/rapport.pdf index 6f53ee0..1d8ac74 100644 Binary files a/rapport/rapport.pdf and b/rapport/rapport.pdf differ