diff --git a/rapport/appendices.tex b/rapport/appendices.tex index 5491011..159fea0 100644 --- a/rapport/appendices.tex +++ b/rapport/appendices.tex @@ -1,7 +1,57 @@ \clearpage \pagenumbering{arabic}% resets `page` counter to 1 -\renewcommand*{\thepage}{A-\arabic{page}} +\renewcommand*{\thepage}{S-\arabic{page}} \appendix + +\chapter{Supplementary for~\nameref{chap:struct-detection}} +\section{Proof of the idenfiability result} +\label{sec:proof-identifiability} + +We recall the following +\def\thetheorem{\ref{thm:identifiability-iid}} +\begin{theorem}[Identifiability of $iid$-colBiSBM] + The parameters $(\bm{\pi}, \bm{\rho}, \bm{\alpha})$ are + identifiable up to a label switching of the blocks if those + conditions are achieved: + \begin{itemize} + \item[(1.1)] $\exists m^*\in\{1,\dots,M\} : n^1_{m^*} \geq 2 Q_2 - 1~\text{and}~n^2_{m^*} \geq 2 Q_1 - 1$. + \item[(1.2)] $\forall 1\leq q \leq Q_1, \pi_q > 0$ + and the coordinates of vector $\bm{\rho} + {X^{m^*}}^T$ are distinct (where ${X^{m^*}}^T$ is the transpose of $X^{m^*}$). + \item[(1.3)] $\forall 1\leq r \leq Q_2, \rho_r > 0$ + and the coordinates of vector $\bm{\pi} + X^{m^*}$ are distinct. + \end{itemize} +\end{theorem} +\begin{proof} + Following the tracks of~\cite{chabert-liddellLearningCommonStructures2024a} + we derive the result in Properties~\ref{thm:identifiability-iid}. + + \cite{keribinEstimationSelectionLatent2015} building + on~\cite{celisseConsistencyMaximumlikelihoodVariational2012}, proved that the + parameters $(\bm{\pi}, \bm{\rho}, \bm{\alpha})$ of the + $\mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1^m, Q_2^m, \bm{\pi^m}, \bm{\rho^m}, \bm{\alpha^m})$ + are identifiable from the observation of network $X^m$ when $\mathcal{F}$ + is the Bernoulli distribution and the following conditions are met: + \begin{enumerate} + \item $ n_1^m \geq 2 Q_2^m - 1~\text{and}~n_2^m \geq 2 Q_1^m - 1$. + \item $\forall 1\leq q \leq Q_1^m, \pi_q^m > 0$ + and the coordinates of vector $\bm{\rho^m} + {X^{m^*}}^T$ are distinct (where ${X^{m^*}}^T$ is the transpose of $X^{m^*}$). + \item $\forall 1\leq r \leq Q_2^m, \rho_r^m > 0$ + and the coordinates of vector $\bm{\pi^m} + X^{m^*}$ are distinct. + \end{enumerate} + + Under the \emph{iid}-colBiSBM model, for all $m=1\dots M$, + $X^m \sim \mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2, + \bm{\pi}, \bm{\rho}, \bm{\alpha})$. This means that + following~\cite{keribinEstimationSelectionLatent2015}, the + identifiability of $\bm{\alpha}$, $\bm{\pi}$ and $\bm{\rho}$ is obtained + from the distribution of $X^{m^*}$ under assumptions (1.1), (1.2) and + (1.3). +\end{proof} + \chapter{Supplementary for~\nameref{chap:simulation-studies}} Below are the supplementary material for the~\nameref{chap:simulation-studies}. @@ -31,14 +81,18 @@ Please note that blank space indicates that among all conditions the corresponding model was not selected at all. \begin{landscape} - \pagestyle{empty} \input{../tables/simulations/model_selection/model-selection.tex} \end{landscape} -\pagestyle{fancy} + + \chapter{Supplementary for~\nameref{chap:applications-ecological-networks}} \section{Additional information on~\nameref{sec:baldock-clustering}} +\fancypagestyle{fancy} + +\renewcommand*{\thepage}{S-\arabic{page}} + Due to report size limitations we included these plots here as they are not crucial to understand what is going on in the section~\ref{sec:baldock-clustering}. Yet they are useful to confirm the explanation given. diff --git a/rapport/chapter3-structure-detection.tex b/rapport/chapter3-structure-detection.tex index 835966e..16cdd14 100644 --- a/rapport/chapter3-structure-detection.tex +++ b/rapport/chapter3-structure-detection.tex @@ -1,5 +1,6 @@ \addtocounter{customchapter}{1} \chapter[Structure detection in bipartite collection]{Structure detection in a collection of bipartite networks} +\label{chap:struct-detection} \section{Definition of a collection} \label{sec:definition-of-a-collection} @@ -351,49 +352,49 @@ We provide below the expression for the penalties for the 4 models that we propose. \begin{description} \item[\textit{iid}-colBiSBM] For the $\bm\pi$ and $\bm\rho$: - \begin{align*} - \text{pen}_{\pi}(Q_1) = (Q_1 - 1)\log(\sum_{m=1}^{M}n_{1}^{m}) & , & - \text{pen}_{\rho}(Q_2) = (Q_2 - 1)\log(\sum_{m=1}^{M}n_{2}^{m}) - \end{align*} - For the $\bm\alpha$: - \[\text{pen}_{\alpha}(Q_1, Q_2) = Q_1 \times Q_2 \log(N_M)\] - with - \[ N_M = \sum_{m = 1}^{M} n_{1}^{m} \times n_{2}^{m} \] - And thus the $\text{BIC-L}$ formula is the following: - \[ \text{BIC-L}(\bm{X},Q_1, Q_2) = \max_{\theta} - \mathcal{J} (\mathcal{\hat{R}}, \bm{\theta}) - - \frac{1}{2} [\text{pen}_{\pi}(Q_1) + \text{pen}_{\rho}(Q_2) + - \text{pen}_{\alpha}(Q_1, Q_2)]\] + \begin{align*} + \text{pen}_{\pi}(Q_1) = (Q_1 - 1)\log(\sum_{m=1}^{M}n_{1}^{m}) & , & + \text{pen}_{\rho}(Q_2) = (Q_2 - 1)\log(\sum_{m=1}^{M}n_{2}^{m}) + \end{align*} + For the $\bm\alpha$: + \[\text{pen}_{\alpha}(Q_1, Q_2) = Q_1 \times Q_2 \log(N_M)\] + with + \[ N_M = \sum_{m = 1}^{M} n_{1}^{m} \times n_{2}^{m} \] + And thus the $\text{BIC-L}$ formula is the following: + \[ \text{BIC-L}(\bm{X},Q_1, Q_2) = \max_{\theta} + \mathcal{J} (\mathcal{\hat{R}}, \bm{\theta}) + - \frac{1}{2} [\text{pen}_{\pi}(Q_1) + \text{pen}_{\rho}(Q_2) + + \text{pen}_{\alpha}(Q_1, Q_2)]\] \item[$\bm{\pi\rho}$-colBiSBM] The support penalties are - \begin{align*} - \text{pen}_{S_1}(Q_1) = -2 \log p_{Q_1} (S_1) & , & - \text{pen}_{S_2}(Q_2) = -2 \log p_{Q_2} (S_2) - \end{align*} - with \begin{align*} - \textstyle \log p_{Q_1}(S_1) = - M \log(Q_1) - \sum_{m=1}^{M} \log {Q_1 - \choose Q_1^{(m)}}, \\ - \textstyle \log p_{Q_2}(S_2) = - M \log(Q_2) - \sum_{m=1}^{M} \log {Q_2 - \choose Q_2^{(m)}}. - \end{align*} - And penalties for the $\bm\rho$ and $\bm\pi$ are - \[ \text{pen}_{\pi}(Q_1, S_1) = \sum_{m=1}^{M} (Q_{1}^{(m)} - 1) - \log n_{1}^{m}, - ~\text{pen}_{\rho}(Q_2, S_2) = \sum_{m=1}^{M} (Q_{2}^{(m)} - 1) - \log n_{2}^{m}. \] - Penalties for the $\bm\alpha$ - \[ \text{pen}_{\alpha}(Q_1, Q_2, S_1, S_2) = (\sum_{q=1}^{Q_1} - \sum_{r=1}^{Q_2} \mathbbb{1}_{(S_1)'S_2 > 0}) \log (N_M). \] - And the corresponding BIC-L formula, - \[ - \begin{aligned} - \text{BIC-L}(\bm{X},Q_1, Q_2) = - \max_{S_1,S_2} [ - & \max_{\theta_{S_1,S_2} \in \Theta_{S_1,S_2}} \mathcal{J}(\mathcal{\hat{R}},\theta_{S_1,S_2}) \\ - - \frac{1}{2} & (\text{pen}_{\pi}(Q_1, S_1) + \text{pen}_{\rho}(Q_2, S_2) \\ - & + \text{pen}_{\alpha}(Q_1, Q_2, S_1, S_2) \\ - & + \text{pen}_{S_1}(Q_1) + \text{pen}_{S_2}(Q_2))] \\ - \end{aligned} - \] + \begin{align*} + \text{pen}_{S_1}(Q_1) = -2 \log p_{Q_1} (S_1) & , & + \text{pen}_{S_2}(Q_2) = -2 \log p_{Q_2} (S_2) + \end{align*} + with \begin{align*} + \textstyle \log p_{Q_1}(S_1) = - M \log(Q_1) - \sum_{m=1}^{M} \log {Q_1 + \choose Q_1^{(m)}}, \\ + \textstyle \log p_{Q_2}(S_2) = - M \log(Q_2) - \sum_{m=1}^{M} \log {Q_2 + \choose Q_2^{(m)}}. + \end{align*} + And penalties for the $\bm\rho$ and $\bm\pi$ are + \[ \text{pen}_{\pi}(Q_1, S_1) = \sum_{m=1}^{M} (Q_{1}^{(m)} - 1) + \log n_{1}^{m}, + ~\text{pen}_{\rho}(Q_2, S_2) = \sum_{m=1}^{M} (Q_{2}^{(m)} - 1) + \log n_{2}^{m}. \] + Penalties for the $\bm\alpha$ + \[ \text{pen}_{\alpha}(Q_1, Q_2, S_1, S_2) = (\sum_{q=1}^{Q_1} + \sum_{r=1}^{Q_2} \mathbbb{1}_{(S_1)'S_2 > 0}) \log (N_M). \] + And the corresponding BIC-L formula, + \[ + \begin{aligned} + \text{BIC-L}(\bm{X},Q_1, Q_2) = + \max_{S_1,S_2} [ + & \max_{\theta_{S_1,S_2} \in \Theta_{S_1,S_2}} \mathcal{J}(\mathcal{\hat{R}},\theta_{S_1,S_2}) \\ + - \frac{1}{2} & (\text{pen}_{\pi}(Q_1, S_1) + \text{pen}_{\rho}(Q_2, S_2) \\ + & + \text{pen}_{\alpha}(Q_1, Q_2, S_1, S_2) \\ + & + \text{pen}_{S_1}(Q_1) + \text{pen}_{S_2}(Q_2))] \\ + \end{aligned} + \] \end{description} \subsection{Initialization and pairing of the models} @@ -708,7 +709,7 @@ And the pairwise dissimilarity for networks $(m,m')\in\mathcal{M}^2$ is then: \begin{figure}[t] \centering - \begin{tikzpicture} + \begin{tikzpicture}[scale=0.7] \tikzstyle{instruct}=[font=\small, text justified, rectangle,draw,fill=yellow!50] \tikzstyle{first_col}=[rectangle, text justified, draw,fill=gray!50] \tikzstyle{second_col}=[scale=0.55, circle, draw,fill=red!50] @@ -751,12 +752,10 @@ trivial partition in a unique group. Then using the \emph{Kmeans} we split the collection in two sub-collections with the dissimilarity matrix. The two sub-collections are fitted and we compute the score of this new partition $\mathcal{G}^{*} = \{G_1, G_2\}$. - If $Sc(\mathcal{G}^{*}) > Sc(\mathcal{G})$ then we repeat the same procedure on $G_1$ and $G_2$. Else we return $\mathcal{G}$. - We illustrate our capacity to perform a partition of a collection for all -colBiSBM models in %\ref{sec:network-clustering-of-simulated-networks}. +colBiSBM models in~\ref{sec:network-clustering-of-simulated-networks}. \section{Model identifiability} \label{sec:model-identifiability} @@ -764,7 +763,7 @@ colBiSBM models in %\ref{sec:network-clustering-of-simulated-networks}. The goal here is to prove that if $\ell(\bm{X};\bm{\theta}) = \ell(\bm{X};\bm{\theta}')$ for any collection $\bm{X}$ then $\bm{\theta} = \bm{\theta}'$. Following the proof proposed by~\cite{chabert-liddellLearningCommonStructures2024a}, that adapted it to the collection case and~\cite{keribinEstimationSelectionLatent2015} that extended the result of~\cite{celisseConsistencyMaximumlikelihoodVariational2012} to the LBM Bernoulli model, -we obtain the following proof of identifiability for the $iid$-colBiSBM: +we obtain the following result of identifiability\footnote{The proof is in appendix. \ref{sec:proof-identifiability}} for the $iid$-colBiSBM: \begin{theorem}[Identifiability of $iid$-colBiSBM] \label{thm:identifiability-iid} The parameters $(\bm{\pi}, \bm{\rho}, \bm{\alpha})$ are @@ -773,11 +772,11 @@ we obtain the following proof of identifiability for the $iid$-colBiSBM: \begin{itemize} \item[(1.1)] $\exists m^*\in\{1,\dots,M\} : n^1_{m^*} \geq 2 Q_2 - 1~\text{and}~n^2_{m^*} \geq 2 Q_1 - 1$. \item[(1.2)] $\forall 1\leq q \leq Q_1, \pi_q > 0$ - and the coordinates of vector $\bm{\rho} - {X^{m^*}}^T$ are distinct (where ${X^{m^*}}^T$ is the transpose of $X^{m^*}$). + and the coordinates of vector $\bm{\rho} + {X^{m^*}}^T$ are distinct (where ${X^{m^*}}^T$ is the transpose of $X^{m^*}$). \item[(1.3)] $\forall 1\leq r \leq Q_2, \rho_r > 0$ - and the coordinates of vector $\bm{\pi} - X^{m^*}$ are distinct. + and the coordinates of vector $\bm{\pi} + X^{m^*}$ are distinct. \end{itemize} \end{theorem} diff --git a/rapport/rapport.pdf b/rapport/rapport.pdf index 5e0bf88..d2115f2 100644 Binary files a/rapport/rapport.pdf and b/rapport/rapport.pdf differ diff --git a/rapport/rapport.tex b/rapport/rapport.tex index 6f3465c..60ccf34 100644 --- a/rapport/rapport.tex +++ b/rapport/rapport.tex @@ -26,7 +26,7 @@ hypertexnames=true } -\newtheorem{theorem}{Theorem} +\newtheorem{theorem}{Properties} \usepackage{tocbibind} % Pour avoir des index pour table des matières, biblio \usepackage{geometry} \geometry{bmargin=25mm} @@ -223,31 +223,31 @@ automata,positioning} % Pour activer les onglets \ActivateBG \begin{selectlanguage}{french} - % \maketitle - \pagenumbering{roman} - \tableofcontents - \include{remerciements} - % \include{chapter1-presentation_UMR} + % \maketitle + \pagenumbering{roman} + \tableofcontents + \include{remerciements} + % \include{chapter1-presentation_UMR} \end{selectlanguage} \begin{selectlanguage}{english} - \pagenumbering{arabic} - \include{chapter2-context} - \include{chapter3-structure-detection} - \include{chapter4-simulation-studies} + \pagenumbering{arabic} + \include{chapter2-context} + \include{chapter3-structure-detection} + \include{chapter4-simulation-studies} - % \chapter{Applications} - % \include{Rcodes/real_data/application_dore} - % \include{Rcodes/real_data/CoOPLBM_completion_analyze} - \include{chapter5-applications} - \include{conclusions} + % \chapter{Applications} + % \include{Rcodes/real_data/application_dore} + % \include{Rcodes/real_data/CoOPLBM_completion_analyze} + \include{chapter5-applications} + \include{conclusions} - \addtocounter{maincontentend}{1} - \addtocounter{customchapter}{1} - \printbibliography - \input{appendices.tex} - % \listoffigures - % \listoftables + \addtocounter{maincontentend}{1} + \addtocounter{customchapter}{1} + \printbibliography + \input{appendices.tex} + % \listoffigures + % \listoftables \end{selectlanguage} \end{document}