appendix : adding proof of ident results

2024-08-18 14:47:37 +02:00 · 2024-08-18 14:47:37 +02:00 · 6d6652375d
commit 6d6652375d
parent a4cc5deec8
4 changed files with 128 additions and 75 deletions
--- a/rapport/appendices.tex
+++ b/rapport/appendices.tex
@ -1,7 +1,57 @@
 \clearpage
 \pagenumbering{arabic}% resets `page` counter to 1
-\renewcommand*{\thepage}{A-\arabic{page}}
+\renewcommand*{\thepage}{S-\arabic{page}}
 \appendix
+
+\chapter{Supplementary for~\nameref{chap:struct-detection}}
+\section{Proof of the idenfiability result}
+\label{sec:proof-identifiability}
+
+We recall the following
+\def\thetheorem{\ref{thm:identifiability-iid}}
+\begin{theorem}[Identifiability of $iid$-colBiSBM]
+    The parameters $(\bm{\pi}, \bm{\rho}, \bm{\alpha})$ are
+    identifiable up to a label switching of the blocks if those
+    conditions are achieved:
+    \begin{itemize}
+        \item[(1.1)] $\exists m^*\in\{1,\dots,M\} : n^1_{m^*} \geq 2 Q_2 - 1~\text{and}~n^2_{m^*} \geq 2 Q_1 - 1$.
+        \item[(1.2)] $\forall 1\leq q \leq Q_1, \pi_q > 0$
+            and the coordinates of vector $\bm{\rho}
+                {X^{m^*}}^T$ are distinct (where ${X^{m^*}}^T$ is the transpose of $X^{m^*}$).
+        \item[(1.3)] $\forall 1\leq r \leq Q_2, \rho_r > 0$
+            and the coordinates of vector $\bm{\pi}
+                X^{m^*}$ are distinct.
+    \end{itemize}
+\end{theorem}
+\begin{proof}
+    Following the tracks of~\cite{chabert-liddellLearningCommonStructures2024a}
+    we derive the result in Properties~\ref{thm:identifiability-iid}.
+
+    \cite{keribinEstimationSelectionLatent2015} building
+    on~\cite{celisseConsistencyMaximumlikelihoodVariational2012}, proved that the
+    parameters $(\bm{\pi}, \bm{\rho}, \bm{\alpha})$ of the
+    $\mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1^m, Q_2^m, \bm{\pi^m}, \bm{\rho^m}, \bm{\alpha^m})$
+    are identifiable from the observation of network $X^m$ when $\mathcal{F}$
+    is the Bernoulli distribution and the following conditions are met:
+    \begin{enumerate}
+        \item $ n_1^m \geq 2 Q_2^m - 1~\text{and}~n_2^m \geq 2 Q_1^m - 1$.
+        \item $\forall 1\leq q \leq Q_1^m, \pi_q^m > 0$
+              and the coordinates of vector $\bm{\rho^m}
+                  {X^{m^*}}^T$ are distinct (where ${X^{m^*}}^T$ is the transpose of $X^{m^*}$).
+        \item $\forall 1\leq r \leq Q_2^m, \rho_r^m > 0$
+              and the coordinates of vector $\bm{\pi^m}
+                  X^{m^*}$ are distinct.
+    \end{enumerate}
+
+    Under the \emph{iid}-colBiSBM model, for all $m=1\dots M$,
+    $X^m \sim \mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2,
+        \bm{\pi}, \bm{\rho}, \bm{\alpha})$. This means that
+    following~\cite{keribinEstimationSelectionLatent2015}, the
+    identifiability of $\bm{\alpha}$, $\bm{\pi}$ and $\bm{\rho}$ is obtained
+    from the distribution of $X^{m^*}$ under assumptions (1.1), (1.2) and
+    (1.3).
+\end{proof}
+
 \chapter{Supplementary for~\nameref{chap:simulation-studies}}
 Below are the supplementary material for the~\nameref{chap:simulation-studies}.

@ -31,14 +81,18 @@ Please note that blank space indicates that among all conditions
 the corresponding model was not selected at all.

 \begin{landscape}
-    \pagestyle{empty}
    \input{../tables/simulations/model_selection/model-selection.tex}
 \end{landscape}
-\pagestyle{fancy}
+
+

 \chapter{Supplementary for~\nameref{chap:applications-ecological-networks}}
 \section{Additional information on~\nameref{sec:baldock-clustering}}

+\fancypagestyle{fancy}
+
+\renewcommand*{\thepage}{S-\arabic{page}}
+
 Due to report size limitations we included these plots here as they are not crucial to understand what is going on in
 the section~\ref{sec:baldock-clustering}.
 Yet they are useful to confirm the explanation given.
--- a/rapport/chapter3-structure-detection.tex
+++ b/rapport/chapter3-structure-detection.tex
@ -1,5 +1,6 @@
 \addtocounter{customchapter}{1}
 \chapter[Structure detection in bipartite collection]{Structure detection in a collection of bipartite networks}
+\label{chap:struct-detection}
 \section{Definition of a collection}
 \label{sec:definition-of-a-collection}

@ -351,49 +352,49 @@ We provide below the expression for the penalties for the 4 models that we
 propose.
 \begin{description}
    \item[\textit{iid}-colBiSBM] For the $\bm\pi$ and $\bm\rho$:
-          \begin{align*}
-              \text{pen}_{\pi}(Q_1) = (Q_1 - 1)\log(\sum_{m=1}^{M}n_{1}^{m}) & , &
-              \text{pen}_{\rho}(Q_2) = (Q_2 - 1)\log(\sum_{m=1}^{M}n_{2}^{m})
-          \end{align*}
-          For the $\bm\alpha$:
-          \[\text{pen}_{\alpha}(Q_1, Q_2) = Q_1 \times Q_2 \log(N_M)\]
-          with
-          \[ N_M = \sum_{m = 1}^{M} n_{1}^{m} \times n_{2}^{m} \]
-          And thus the $\text{BIC-L}$ formula is the following:
-          \[ \text{BIC-L}(\bm{X},Q_1, Q_2) = \max_{\theta}
-              \mathcal{J} (\mathcal{\hat{R}}, \bm{\theta})
-              - \frac{1}{2} [\text{pen}_{\pi}(Q_1) + \text{pen}_{\rho}(Q_2) +
-                  \text{pen}_{\alpha}(Q_1, Q_2)]\]
+        \begin{align*}
+            \text{pen}_{\pi}(Q_1) = (Q_1 - 1)\log(\sum_{m=1}^{M}n_{1}^{m}) & , &
+            \text{pen}_{\rho}(Q_2) = (Q_2 - 1)\log(\sum_{m=1}^{M}n_{2}^{m})
+        \end{align*}
+        For the $\bm\alpha$:
+        \[\text{pen}_{\alpha}(Q_1, Q_2) = Q_1 \times Q_2 \log(N_M)\]
+        with
+        \[ N_M = \sum_{m = 1}^{M} n_{1}^{m} \times n_{2}^{m} \]
+        And thus the $\text{BIC-L}$ formula is the following:
+        \[ \text{BIC-L}(\bm{X},Q_1, Q_2) = \max_{\theta}
+            \mathcal{J} (\mathcal{\hat{R}}, \bm{\theta})
+            - \frac{1}{2} [\text{pen}_{\pi}(Q_1) + \text{pen}_{\rho}(Q_2) +
+                \text{pen}_{\alpha}(Q_1, Q_2)]\]
    \item[$\bm{\pi\rho}$-colBiSBM] The support penalties are
-          \begin{align*}
-              \text{pen}_{S_1}(Q_1) = -2 \log p_{Q_1} (S_1) & , &
-              \text{pen}_{S_2}(Q_2) = -2 \log p_{Q_2} (S_2)
-          \end{align*}
-          with \begin{align*}
-              \textstyle \log p_{Q_1}(S_1) = - M \log(Q_1) - \sum_{m=1}^{M} \log {Q_1
-              \choose Q_1^{(m)}}, \\
-              \textstyle \log p_{Q_2}(S_2) = - M \log(Q_2) - \sum_{m=1}^{M} \log {Q_2
-              \choose Q_2^{(m)}}.
-          \end{align*}
-          And penalties for the $\bm\rho$ and $\bm\pi$ are
-          \[ \text{pen}_{\pi}(Q_1, S_1) = \sum_{m=1}^{M} (Q_{1}^{(m)} - 1)
-              \log n_{1}^{m},
-              ~\text{pen}_{\rho}(Q_2, S_2) = \sum_{m=1}^{M} (Q_{2}^{(m)} - 1)
-              \log n_{2}^{m}. \]
-          Penalties for the $\bm\alpha$
-          \[ \text{pen}_{\alpha}(Q_1, Q_2, S_1, S_2) = (\sum_{q=1}^{Q_1}
-              \sum_{r=1}^{Q_2} \mathbbb{1}_{(S_1)'S_2 > 0}) \log (N_M). \]
-          And the corresponding BIC-L formula,
-          \[
-              \begin{aligned}
-                  \text{BIC-L}(\bm{X},Q_1, Q_2) =
-                  \max_{S_1,S_2} [
-                                & \max_{\theta_{S_1,S_2} \in \Theta_{S_1,S_2}} \mathcal{J}(\mathcal{\hat{R}},\theta_{S_1,S_2}) \\
-                  - \frac{1}{2} & (\text{pen}_{\pi}(Q_1, S_1)  + \text{pen}_{\rho}(Q_2, S_2)                                   \\
-                                & + \text{pen}_{\alpha}(Q_1, Q_2, S_1, S_2)                                                    \\
-                                & + \text{pen}_{S_1}(Q_1) + \text{pen}_{S_2}(Q_2))]                                            \\
-              \end{aligned}
-          \]
+        \begin{align*}
+            \text{pen}_{S_1}(Q_1) = -2 \log p_{Q_1} (S_1) & , &
+            \text{pen}_{S_2}(Q_2) = -2 \log p_{Q_2} (S_2)
+        \end{align*}
+        with \begin{align*}
+            \textstyle \log p_{Q_1}(S_1) = - M \log(Q_1) - \sum_{m=1}^{M} \log {Q_1
+            \choose Q_1^{(m)}}, \\
+            \textstyle \log p_{Q_2}(S_2) = - M \log(Q_2) - \sum_{m=1}^{M} \log {Q_2
+            \choose Q_2^{(m)}}.
+        \end{align*}
+        And penalties for the $\bm\rho$ and $\bm\pi$ are
+        \[ \text{pen}_{\pi}(Q_1, S_1) = \sum_{m=1}^{M} (Q_{1}^{(m)} - 1)
+            \log n_{1}^{m},
+            ~\text{pen}_{\rho}(Q_2, S_2) = \sum_{m=1}^{M} (Q_{2}^{(m)} - 1)
+            \log n_{2}^{m}. \]
+        Penalties for the $\bm\alpha$
+        \[ \text{pen}_{\alpha}(Q_1, Q_2, S_1, S_2) = (\sum_{q=1}^{Q_1}
+            \sum_{r=1}^{Q_2} \mathbbb{1}_{(S_1)'S_2 > 0}) \log (N_M). \]
+        And the corresponding BIC-L formula,
+        \[
+            \begin{aligned}
+                \text{BIC-L}(\bm{X},Q_1, Q_2) =
+                \max_{S_1,S_2} [
+                              & \max_{\theta_{S_1,S_2} \in \Theta_{S_1,S_2}} \mathcal{J}(\mathcal{\hat{R}},\theta_{S_1,S_2}) \\
+                - \frac{1}{2} & (\text{pen}_{\pi}(Q_1, S_1)  + \text{pen}_{\rho}(Q_2, S_2)                                   \\
+                              & + \text{pen}_{\alpha}(Q_1, Q_2, S_1, S_2)                                                    \\
+                              & + \text{pen}_{S_1}(Q_1) + \text{pen}_{S_2}(Q_2))]                                            \\
+            \end{aligned}
+        \]
 \end{description}

 \subsection{Initialization and pairing of the models}
@ -708,7 +709,7 @@ And the pairwise dissimilarity for networks $(m,m')\in\mathcal{M}^2$ is then:

 \begin{figure}[t]
    \centering
-    \begin{tikzpicture}
+    \begin{tikzpicture}[scale=0.7]
        \tikzstyle{instruct}=[font=\small, text justified, rectangle,draw,fill=yellow!50]
        \tikzstyle{first_col}=[rectangle, text justified, draw,fill=gray!50]
        \tikzstyle{second_col}=[scale=0.55, circle, draw,fill=red!50]
@ -751,12 +752,10 @@ trivial partition in a unique group.
 Then using the \emph{Kmeans} we split the collection in two sub-collections
 with the dissimilarity matrix. The two sub-collections are fitted and we
 compute the score of this new partition $\mathcal{G}^{*} = \{G_1, G_2\}$.
-
 If $Sc(\mathcal{G}^{*}) > Sc(\mathcal{G})$ then we repeat the same procedure on
 $G_1$ and $G_2$. Else we return $\mathcal{G}$.
-
 We illustrate our capacity to perform a partition of a collection for all
-colBiSBM models in %\ref{sec:network-clustering-of-simulated-networks}.
+colBiSBM models in~\ref{sec:network-clustering-of-simulated-networks}.

 \section{Model identifiability}
 \label{sec:model-identifiability}
@ -764,7 +763,7 @@ colBiSBM models in %\ref{sec:network-clustering-of-simulated-networks}.
 The goal here is to prove that if $\ell(\bm{X};\bm{\theta}) = \ell(\bm{X};\bm{\theta}')$ for any collection $\bm{X}$ then $\bm{\theta} = \bm{\theta}'$.

 Following the proof proposed by~\cite{chabert-liddellLearningCommonStructures2024a}, that adapted it to the collection case and~\cite{keribinEstimationSelectionLatent2015} that extended the result of~\cite{celisseConsistencyMaximumlikelihoodVariational2012} to the LBM Bernoulli model,
-we obtain the following proof of identifiability for the $iid$-colBiSBM:
+we obtain the following result of identifiability\footnote{The proof is in appendix. \ref{sec:proof-identifiability}} for the $iid$-colBiSBM:
 \begin{theorem}[Identifiability of $iid$-colBiSBM]
    \label{thm:identifiability-iid}
    The parameters $(\bm{\pi}, \bm{\rho}, \bm{\alpha})$ are
@ -773,11 +772,11 @@ we obtain the following proof of identifiability for the $iid$-colBiSBM:
    \begin{itemize}
        \item[(1.1)] $\exists m^*\in\{1,\dots,M\} : n^1_{m^*} \geq 2 Q_2 - 1~\text{and}~n^2_{m^*} \geq 2 Q_1 - 1$.
        \item[(1.2)] $\forall 1\leq q \leq Q_1, \pi_q > 0$
-              and the coordinates of vector $\bm{\rho}
-                  {X^{m^*}}^T$ are distinct (where ${X^{m^*}}^T$ is the transpose of $X^{m^*}$).
+            and the coordinates of vector $\bm{\rho}
+                {X^{m^*}}^T$ are distinct (where ${X^{m^*}}^T$ is the transpose of $X^{m^*}$).
        \item[(1.3)] $\forall 1\leq r \leq Q_2, \rho_r > 0$
-              and the coordinates of vector $\bm{\pi}
-                  X^{m^*}$ are distinct.
+            and the coordinates of vector $\bm{\pi}
+                X^{m^*}$ are distinct.
    \end{itemize}
 \end{theorem}

--- a/rapport/rapport.pdf
+++ b/rapport/rapport.pdf
--- a/rapport/rapport.tex
+++ b/rapport/rapport.tex
@ -26,7 +26,7 @@
 	hypertexnames=true
 }

-\newtheorem{theorem}{Theorem}
+\newtheorem{theorem}{Properties}
 \usepackage{tocbibind} % Pour avoir des index pour table des matières, biblio
 \usepackage{geometry}
 \geometry{bmargin=25mm}
@ -223,31 +223,31 @@ automata,positioning}
 % Pour activer les onglets
 \ActivateBG
 \begin{selectlanguage}{french}
-	% \maketitle
-	\pagenumbering{roman}
-	\tableofcontents
-	\include{remerciements}
-	% \include{chapter1-presentation_UMR}
+    % \maketitle
+    \pagenumbering{roman}
+    \tableofcontents
+    \include{remerciements}
+    % \include{chapter1-presentation_UMR}
 \end{selectlanguage}

 \begin{selectlanguage}{english}
-	\pagenumbering{arabic}
-	\include{chapter2-context}
-	\include{chapter3-structure-detection}
-	\include{chapter4-simulation-studies}
+    \pagenumbering{arabic}
+    \include{chapter2-context}
+    \include{chapter3-structure-detection}
+    \include{chapter4-simulation-studies}

-	% \chapter{Applications}
-	% \include{Rcodes/real_data/application_dore}
-	% \include{Rcodes/real_data/CoOPLBM_completion_analyze}
-	\include{chapter5-applications}
-	\include{conclusions}
+    % \chapter{Applications}
+    % \include{Rcodes/real_data/application_dore}
+    % \include{Rcodes/real_data/CoOPLBM_completion_analyze}
+    \include{chapter5-applications}
+    \include{conclusions}

-	\addtocounter{maincontentend}{1}
-	\addtocounter{customchapter}{1}
-	\printbibliography
-	\input{appendices.tex}
-	% \listoffigures
-	% \listoftables
+    \addtocounter{maincontentend}{1}
+    \addtocounter{customchapter}{1}
+    \printbibliography
+    \input{appendices.tex}
+    % \listoffigures
+    % \listoftables
 \end{selectlanguage}

 \end{document}