From 4d7cd3c469a3ace40bfdf5ca4a1b3dbdf07d3b57 Mon Sep 17 00:00:00 2001 From: Louis Date: Mon, 26 May 2025 16:36:28 +0200 Subject: [PATCH] Ajout retours PS --- annexe.tex | 10 +++++ principal.tex | 113 +++++++++++++------------------------------------ references.bib | 19 +++++++++ 3 files changed, 58 insertions(+), 84 deletions(-) diff --git a/annexe.tex b/annexe.tex index 2409da0..ac8d626 100644 --- a/annexe.tex +++ b/annexe.tex @@ -76,6 +76,15 @@ Thus $\ell(\bY;\theta) - \KL{\Ryt}{\log \Prob(\bZ,\bW|\bY;\theta)} = \mathcal{J}(\tau;\theta) \qed$ \end{frame} +\begin{frame} + \frametitle{On the BIC-L} + \begin{align*} + & \text{ICL}(\hat{\theta}) = \Esp_{\mathbf{Z}, \mathbf{W}|\mathbf{Y}} [\log p(\mathbf{Y}|\mathbf{Z},\mathbf{W};\hat{\theta})] - \frac{1}{2} \text{pen}(\dots) \\ + & \text{BIC}(\hat{\theta}) = \Esp_{\mathbf{Z}, \mathbf{W}|\mathbf{Y}} [\log p(\mathbf{Y}|\mathbf{Z},\mathbf{W};\hat{\theta})] + \mathcal{H}(p(\mathbf{Z},\mathbf{W}|\mathbf{Y})) - \frac{1}{2} \text{pen}(\dots) \\ + & \text{BIC-L}(\hat{\theta}, \hat{\tau}) = \Esp_{\mathcal{R}_{\mathbf{Y}, \hat{\tau}}}[\log \ell_c(\mathbf{Y},\mathbf{Z},\mathbf{W};\hat{\theta}^{\text{var}})] + \mathcal{H}(\mathcal{R}_{\mathbf{Y}, \hat{\tau}}) - \frac{1}{2} \text{pen}(\dots) \\ + \end{align*} +\end{frame} + \section{Model selection} \begin{frame} @@ -112,6 +121,7 @@ \end{column} \end{columns} \end{frame} + \begin{frame} \frametitle{Choice of $(Q_1,Q_2)$ - Sliding window} \begin{columns} diff --git a/principal.tex b/principal.tex index f7c0b83..95dad86 100644 --- a/principal.tex +++ b/principal.tex @@ -1,6 +1,5 @@ \section{Model Context} \label{sec:context-of-the-model} - \begin{frame} \frametitle{Why a network?} \begin{columns} @@ -55,6 +54,7 @@ \item node level: degree, centrality\dots \item network level: density, nestedness\dots \end{itemize} + \cite{kolaczykStatisticalAnalysisNetwork2009} \item \textbf<2>{Node embedding and/or clustering with latent variable models} \\\cite{snijdersEstimationPredictionStochastic1997,hoffLatentSpaceApproaches2002} \item Node or network embedding with Graph Convolutional Networks @@ -64,8 +64,7 @@ \begin{frame} \addtocounter{footnote}{1} - \frametitle{Latent Block Model (LBM\footnotemark[\thefootnote])} - \cite{govaertEMAlgorithmBlock2005}. + \frametitle{Bipartite Stochastic Block Model (BiSBM\footnotemark[\thefootnote])}\framesubtitle{\cite{govaertEMAlgorithmBlock2005}} \begin{columns} \begin{column}{0.40\linewidth} \begin{figure}[H] @@ -108,7 +107,7 @@ \end{column}} \end{columns} - \footnotetext[\thefootnote]{Which I will henceforth call BiSBM} + \footnotetext[\thefootnote]{Commonly Known as \emph{Latent Block Model} (LBM) in the literature.} \end{frame} \begin{frame} @@ -160,11 +159,11 @@ \label{sec:extension-of-colsbm-to-bipartite-networks} \begin{frame} \frametitle{Model 0: sep-BiSBM} - \footnotesize - $ - \forall m \in \{1\dots M\}, Y^m \overset{ind}{\sim} \mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1\alert<2>{^m}, Q_2\alert<2>{^m}, \pi\alert<2>{^m}, \rho\alert<2>{^m}, \alpha\alert<2>{^m}) - $ - \onslide<3>{ + \only<1-2>{ + \begin{equation*} + \forall m \in \{1\dots M\}, Y^m \overset{ind}{\sim} \mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1\alert<2->{^m}, Q_2\alert<2->{^m}, \pi\alert<2->{^m}, \rho\alert<2->{^m}, \alpha\alert<2->{^m}) + \end{equation*}} + \only<3>{ \begin{figure}[ht] \centering \begin{subfigure}[ht]{0.42\textwidth} @@ -188,6 +187,7 @@ \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Reading.pdf} \caption{Reading} \end{subfigure} + \vspace{-\baselineskip} \caption{Reordered adjacency matrices, using BiSBM for each network} \label{fig:adj-reord} \end{figure} @@ -249,32 +249,32 @@ % \end{alertblock}} % \end{frame} - +\section{Inference and model selection} +\label{sec:inference-and-model-selection} \begin{frame}{Parameter estimation} By \emph{Variational EM}, as proposed by~\cite{daudinMixtureModelRandom2008, chabert-liddellLearningCommonStructures2024}. \begin{block}{Variational approximation of $\bm{Z,W|Y},\theta^{(t-1)}$} - $\mathcal{R}_{Y^m,\tau}(\mathbf{Z}^m, \mathbf{W}^m) = - \mathcal{R}^1_{Y^m,\tau}(\mathbf{Z}^m) + $\mathcal{R}_{Y^m,\tau}(Z^m, W^m) = + \mathcal{R}^1_{Y^m,\tau}(Z^m) {\color{red}\times} - \mathcal{R}^2_{Y^m,\tau}(\mathbf{W}^m) \Rightarrow$ independence rows, columns. + \mathcal{R}^2_{Y^m,\tau}(W^m) \Rightarrow$ independence between rows and columns. \end{block} \begin{multline*} \ell (\bm{Y};\theta) \geq \color{red}\sum_{m=1}^{M} \bigg( \color{black} \mathcal{Q}^m(\theta\mid\theta^{(t)}) + \mathcal{H}(\mathcal{R}_{Y^m,\theta^{(t)}} - (\mathbf{Z}^m, \mathbf{W}^m)) + (Z^m, W^m)) \color{red}\bigg) \color{black} \eqcolon \mathcal{J}(\tau;\theta) \end{multline*} where $\mathcal{Q}^m(\theta\mid\theta^{(t)}) = - \mathbb{E}_{\mathbf{Z}^m,\mathbf{W}^m + \mathbb{E}_{Z^m,W^m \sim \mathcal{R}_{Y^m,\tau}(.)} - \left[ \ell_c(Y^m,\mathbf{Z}^m,\mathbf{W}^m | \theta) \right] \,$ + \left[ \ell_c(Y^m,Z^m,W^m | \theta) \right] \,$ \end{frame} -\section{Model selection} \begin{frame} \frametitle{Problem of choosing $(Q_1, Q_2)$} Need to select $Q_1$ and $Q_2$. BIC-Like criterion\footnote{ICL + entropy - penalty} @@ -289,7 +289,7 @@ \item Exploration of a 2D grid is costly. \uncover<2->{$\rightarrow$ \textbf{Greedy approach} and \textbf{sliding window}} \item Sensitivity to initializations. \uncover<3->{$\rightarrow$ \textbf{Spectral - clustering} and \textbf{reuse of previous inits}} + clustering} and \textbf{split \& merge} approach} \end{itemize} \end{alertblock} \end{frame} @@ -328,25 +328,26 @@ \begin{frame} \frametitle{Results~\cite{baldockSystemsApproachReveals2019} focus on Leeds} + \captionsetup{font=normalsize} \begin{figure}[ht] \centering \begin{subfigure}[t]{0.5\textwidth} \centering \includegraphics[width=1\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Leeds.pdf} - \caption{Separate model} + \caption{Leeds with sep-BiSBM} \end{subfigure}\hfill \begin{subfigure}[t]{0.5\textwidth} \centering \includegraphics[width=1\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf} - \caption{Joint model} + \caption{Leeds with \emph{iid}-colBiSBM} \end{subfigure} - \caption{Reordered adjacency matrix by sep-BiSBM (left) and by \emph{iid}-colBiSBM (right),~\cite{baldockSystemsApproachReveals2019}} \end{figure} \end{frame} \begin{frame}{\emph{Bombus}} \only<1>{ \begin{figure} + \captionsetup{font=normalsize} \begin{subfigure}[t]{0.5\textwidth} \centering \includegraphics[width=0.5\textwidth]{img/baldock/bombus-hortorum.jpeg} @@ -420,66 +421,21 @@ } \end{frame} -\begin{frame} - \frametitle{Network clustering} - \begin{figure}[ht] - \includegraphics[width=0.45\textwidth]{tikz/applications/baldock/mat-Baldock2011_TB+Baldock2011_JN.pdf} - \caption{Adjacency matrix,~\cite{baldockDailyTemporalStructure2011}} - \end{figure} -\end{frame} +\section{Extension and conclusion} \begin{frame}[allowframebreaks] \frametitle{Application to~\cite{baldockDailyTemporalStructure2011, baldockSystemsApproachReveals2019}} - TODO pivot or remove slide + TODO Put $\alpha$ plots and tree structure of partition \begin{figure}[t] \centering - \begin{subfigure}{0.5\textwidth} - \centering - \includegraphics[scale=0.1]{backup-app-iid-struct1.png} - \includegraphics[scale=0.2]{backup-app-iid-struct2.png} - \caption{Model $iid$,\\ - separate African (left) and English (right) networks} - \end{subfigure}% - ~ - \begin{subfigure}{0.5\textwidth} - \centering - \includegraphics[scale=0.2]{backup-app-pirho-struct.png} - \caption{Model $\pi\rho$,\\ - merge African and English networks} - \end{subfigure}% - \caption{Structures detected for networks - of~\cite{baldockDailyTemporalStructure2011, - baldockSystemsApproachReveals2019}} + \includegraphics[scale=0.1]{backup-app-iid-struct1.png} + \includegraphics[scale=0.2]{backup-app-iid-struct2.png} + \caption{Model $iid$, separate African (left) and English (right) networks} \end{figure} \end{frame} - -\begin{frame}{Results} - \begin{figure}[ht] - \centering - \begin{subfigure}{0.5\textwidth} - \centering - \includegraphics[width=1\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2011_TB+Baldock2011_JN.pdf} - \caption{Reordered by LBM} - \end{subfigure}\hfil - \begin{subfigure}{0.5\textwidth} - \centering - \includegraphics[width=1\textwidth]{tikz/applications/baldock/pirho-colbisbm-mat-Baldock2011_TB+Baldock2011_JN.pdf} - \caption{Reordered by $\pi\rho$-colBiSBM} - \end{subfigure} - - \caption{Reordered adjacency matrix by $\pi\rho$-colBiSBM,~\cite{baldockDailyTemporalStructure2011}} - \end{figure} -\end{frame} - -\section{Conclusion} -\label{sec:conclusion} \begin{frame} \frametitle{Conclusion and perspectives} - % DONE Add a conclusion perspective slide - % Recall models with clustering - % Mention analysis of corrected networks for sampling - % Link to the package \begin{block}{Capabilities} \begin{itemize} \item 4 models including 3 with flexibility on at least one of @@ -488,21 +444,10 @@ \item Partition a set of networks according to their structures. \end{itemize} \end{block} - -\end{frame} - -\begin{frame}{Perspectives} - - \begin{block}{Future work} - \begin{itemize} - \item Multi-layer networks (account for sampling bias, presence/absence) - \item Graph Convolutional Network to allow for scalability - \end{itemize} - \end{block} - \begin{block}{Package and applications} \begin{itemize} - \item CRAN submission + \item \texttt{ArXiv} preprint in redaction + \item \texttt{CRAN} submission \item Integrate the possibility of an additional criterion for clustering (e.g. urbanization gradient~\cite{fisogniSeasonalTrajectoriesPlantpollinator2022}) \item Apply clustering to data from diff --git a/references.bib b/references.bib index 0bf297d..d01d845 100644 --- a/references.bib +++ b/references.bib @@ -708,6 +708,25 @@ Read\_Status\_Date: 2025-05-09T11:54:37.094Z}, file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5THEWLW6/Kipf et Welling - 2016 - Variational Graph Auto-Encoders.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/BBTHQNRZ/1611.html} } +@book{kolaczykStatisticalAnalysisNetwork2009, + title = {Statistical {{Analysis}} of {{Network Data}}: {{Methods}} and {{Models}}}, + shorttitle = {Statistical {{Analysis}} of {{Network Data}}}, + author = {Kolaczyk, Eric D.}, + date = {2009}, + series = {Springer {{Series}} in {{Statistics}}}, + publisher = {Springer New York}, + location = {New York, NY}, + doi = {10.1007/978-0-387-88146-1}, + url = {https://link.springer.com/10.1007/978-0-387-88146-1}, + urldate = {2025-05-26}, + isbn = {978-0-387-88145-4 978-0-387-88146-1}, + langid = {english}, + keywords = {/unread}, + annotation = {Read\_Status: New\\ +Read\_Status\_Date: 2025-05-26T11:42:27.939Z}, + file = {/home/louis/snap/zotero-snap/common/Zotero/storage/RQPMHFGB/Kolaczyk - 2009 - Statistical Analysis of Network Data Methods and Models.pdf} +} + @online{kumpulainenYourBlockOur2024, title = {From Your {{Block}} to Our {{Block}}: {{How}} to {{Find Shared Structure}} between {{Stochastic Block Models}} over {{Multiple Graphs}}}, shorttitle = {From Your {{Block}} to Our {{Block}}},