Ajout retours PS

This commit is contained in:
Louis 2025-05-26 16:36:28 +02:00
parent be08b1bab5
commit 4d7cd3c469
3 changed files with 58 additions and 84 deletions

View file

@ -76,6 +76,15 @@
Thus $\ell(\bY;\theta) - \KL{\Ryt}{\log \Prob(\bZ,\bW|\bY;\theta)} = \mathcal{J}(\tau;\theta) \qed$
\end{frame}
\begin{frame}
\frametitle{On the BIC-L}
\begin{align*}
& \text{ICL}(\hat{\theta}) = \Esp_{\mathbf{Z}, \mathbf{W}|\mathbf{Y}} [\log p(\mathbf{Y}|\mathbf{Z},\mathbf{W};\hat{\theta})] - \frac{1}{2} \text{pen}(\dots) \\
& \text{BIC}(\hat{\theta}) = \Esp_{\mathbf{Z}, \mathbf{W}|\mathbf{Y}} [\log p(\mathbf{Y}|\mathbf{Z},\mathbf{W};\hat{\theta})] + \mathcal{H}(p(\mathbf{Z},\mathbf{W}|\mathbf{Y})) - \frac{1}{2} \text{pen}(\dots) \\
& \text{BIC-L}(\hat{\theta}, \hat{\tau}) = \Esp_{\mathcal{R}_{\mathbf{Y}, \hat{\tau}}}[\log \ell_c(\mathbf{Y},\mathbf{Z},\mathbf{W};\hat{\theta}^{\text{var}})] + \mathcal{H}(\mathcal{R}_{\mathbf{Y}, \hat{\tau}}) - \frac{1}{2} \text{pen}(\dots) \\
\end{align*}
\end{frame}
\section{Model selection}
\begin{frame}
@ -112,6 +121,7 @@
\end{column}
\end{columns}
\end{frame}
\begin{frame}
\frametitle{Choice of $(Q_1,Q_2)$ - Sliding window}
\begin{columns}

View file

@ -1,6 +1,5 @@
\section{Model Context}
\label{sec:context-of-the-model}
\begin{frame}
\frametitle{Why a network?}
\begin{columns}
@ -55,6 +54,7 @@
\item node level: degree, centrality\dots
\item network level: density, nestedness\dots
\end{itemize}
\cite{kolaczykStatisticalAnalysisNetwork2009}
\item \textbf<2>{Node embedding and/or clustering with latent variable models}
\\\cite{snijdersEstimationPredictionStochastic1997,hoffLatentSpaceApproaches2002}
\item Node or network embedding with Graph Convolutional Networks
@ -64,8 +64,7 @@
\begin{frame}
\addtocounter{footnote}{1}
\frametitle{Latent Block Model (LBM\footnotemark[\thefootnote])}
\cite{govaertEMAlgorithmBlock2005}.
\frametitle{Bipartite Stochastic Block Model (BiSBM\footnotemark[\thefootnote])}\framesubtitle{\cite{govaertEMAlgorithmBlock2005}}
\begin{columns}
\begin{column}{0.40\linewidth}
\begin{figure}[H]
@ -108,7 +107,7 @@
\end{column}}
\end{columns}
\footnotetext[\thefootnote]{Which I will henceforth call BiSBM}
\footnotetext[\thefootnote]{Commonly Known as \emph{Latent Block Model} (LBM) in the literature.}
\end{frame}
\begin{frame}
@ -160,11 +159,11 @@
\label{sec:extension-of-colsbm-to-bipartite-networks}
\begin{frame}
\frametitle{Model 0: sep-BiSBM}
\footnotesize
$
\forall m \in \{1\dots M\}, Y^m \overset{ind}{\sim} \mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1\alert<2>{^m}, Q_2\alert<2>{^m}, \pi\alert<2>{^m}, \rho\alert<2>{^m}, \alpha\alert<2>{^m})
$
\onslide<3>{
\only<1-2>{
\begin{equation*}
\forall m \in \{1\dots M\}, Y^m \overset{ind}{\sim} \mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1\alert<2->{^m}, Q_2\alert<2->{^m}, \pi\alert<2->{^m}, \rho\alert<2->{^m}, \alpha\alert<2->{^m})
\end{equation*}}
\only<3>{
\begin{figure}[ht]
\centering
\begin{subfigure}[ht]{0.42\textwidth}
@ -188,6 +187,7 @@
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Reading.pdf}
\caption{Reading}
\end{subfigure}
\vspace{-\baselineskip}
\caption{Reordered adjacency matrices, using BiSBM for each network}
\label{fig:adj-reord}
\end{figure}
@ -249,32 +249,32 @@
% \end{alertblock}}
% \end{frame}
\section{Inference and model selection}
\label{sec:inference-and-model-selection}
\begin{frame}{Parameter estimation}
By \emph{Variational EM}, as proposed
by~\cite{daudinMixtureModelRandom2008,
chabert-liddellLearningCommonStructures2024}.
\begin{block}{Variational approximation of $\bm{Z,W|Y},\theta^{(t-1)}$}
$\mathcal{R}_{Y^m,\tau}(\mathbf{Z}^m, \mathbf{W}^m) =
\mathcal{R}^1_{Y^m,\tau}(\mathbf{Z}^m)
$\mathcal{R}_{Y^m,\tau}(Z^m, W^m) =
\mathcal{R}^1_{Y^m,\tau}(Z^m)
{\color{red}\times}
\mathcal{R}^2_{Y^m,\tau}(\mathbf{W}^m) \Rightarrow$ independence rows, columns.
\mathcal{R}^2_{Y^m,\tau}(W^m) \Rightarrow$ independence between rows and columns.
\end{block}
\begin{multline*}
\ell (\bm{Y};\theta) \geq \color{red}\sum_{m=1}^{M} \bigg(
\color{black} \mathcal{Q}^m(\theta\mid\theta^{(t)}) +
\mathcal{H}(\mathcal{R}_{Y^m,\theta^{(t)}}
(\mathbf{Z}^m, \mathbf{W}^m))
(Z^m, W^m))
\color{red}\bigg) \color{black}
\eqcolon \mathcal{J}(\tau;\theta)
\end{multline*}
where $\mathcal{Q}^m(\theta\mid\theta^{(t)}) =
\mathbb{E}_{\mathbf{Z}^m,\mathbf{W}^m
\mathbb{E}_{Z^m,W^m
\sim \mathcal{R}_{Y^m,\tau}(.)}
\left[ \ell_c(Y^m,\mathbf{Z}^m,\mathbf{W}^m | \theta) \right] \,$
\left[ \ell_c(Y^m,Z^m,W^m | \theta) \right] \,$
\end{frame}
\section{Model selection}
\begin{frame}
\frametitle{Problem of choosing $(Q_1, Q_2)$}
Need to select $Q_1$ and $Q_2$. BIC-Like criterion\footnote{ICL + entropy - penalty}
@ -289,7 +289,7 @@
\item Exploration of a 2D grid is costly. \uncover<2->{$\rightarrow$ \textbf{Greedy
approach} and \textbf{sliding window}}
\item Sensitivity to initializations. \uncover<3->{$\rightarrow$ \textbf{Spectral
clustering} and \textbf{reuse of previous inits}}
clustering} and \textbf{split \& merge} approach}
\end{itemize}
\end{alertblock}
\end{frame}
@ -328,25 +328,26 @@
\begin{frame}
\frametitle{Results~\cite{baldockSystemsApproachReveals2019} focus on Leeds}
\captionsetup{font=normalsize}
\begin{figure}[ht]
\centering
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=1\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Leeds.pdf}
\caption{Separate model}
\caption{Leeds with sep-BiSBM}
\end{subfigure}\hfill
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=1\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf}
\caption{Joint model}
\caption{Leeds with \emph{iid}-colBiSBM}
\end{subfigure}
\caption{Reordered adjacency matrix by sep-BiSBM (left) and by \emph{iid}-colBiSBM (right),~\cite{baldockSystemsApproachReveals2019}}
\end{figure}
\end{frame}
\begin{frame}{\emph{Bombus}}
\only<1>{
\begin{figure}
\captionsetup{font=normalsize}
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{img/baldock/bombus-hortorum.jpeg}
@ -420,66 +421,21 @@
}
\end{frame}
\begin{frame}
\frametitle{Network clustering}
\begin{figure}[ht]
\includegraphics[width=0.45\textwidth]{tikz/applications/baldock/mat-Baldock2011_TB+Baldock2011_JN.pdf}
\caption{Adjacency matrix,~\cite{baldockDailyTemporalStructure2011}}
\end{figure}
\end{frame}
\section{Extension and conclusion}
\begin{frame}[allowframebreaks]
\frametitle{Application to~\cite{baldockDailyTemporalStructure2011,
baldockSystemsApproachReveals2019}}
TODO pivot or remove slide
TODO Put $\alpha$ plots and tree structure of partition
\begin{figure}[t]
\centering
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[scale=0.1]{backup-app-iid-struct1.png}
\includegraphics[scale=0.2]{backup-app-iid-struct2.png}
\caption{Model $iid$,\\
separate African (left) and English (right) networks}
\end{subfigure}%
~
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[scale=0.2]{backup-app-pirho-struct.png}
\caption{Model $\pi\rho$,\\
merge African and English networks}
\end{subfigure}%
\caption{Structures detected for networks
of~\cite{baldockDailyTemporalStructure2011,
baldockSystemsApproachReveals2019}}
\caption{Model $iid$, separate African (left) and English (right) networks}
\end{figure}
\end{frame}
\begin{frame}{Results}
\begin{figure}[ht]
\centering
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[width=1\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2011_TB+Baldock2011_JN.pdf}
\caption{Reordered by LBM}
\end{subfigure}\hfil
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[width=1\textwidth]{tikz/applications/baldock/pirho-colbisbm-mat-Baldock2011_TB+Baldock2011_JN.pdf}
\caption{Reordered by $\pi\rho$-colBiSBM}
\end{subfigure}
\caption{Reordered adjacency matrix by $\pi\rho$-colBiSBM,~\cite{baldockDailyTemporalStructure2011}}
\end{figure}
\end{frame}
\section{Conclusion}
\label{sec:conclusion}
\begin{frame}
\frametitle{Conclusion and perspectives}
% DONE Add a conclusion perspective slide
% Recall models with clustering
% Mention analysis of corrected networks for sampling
% Link to the package
\begin{block}{Capabilities}
\begin{itemize}
\item 4 models including 3 with flexibility on at least one of
@ -488,21 +444,10 @@
\item Partition a set of networks according to their structures.
\end{itemize}
\end{block}
\end{frame}
\begin{frame}{Perspectives}
\begin{block}{Future work}
\begin{itemize}
\item Multi-layer networks (account for sampling bias, presence/absence)
\item Graph Convolutional Network to allow for scalability
\end{itemize}
\end{block}
\begin{block}{Package and applications}
\begin{itemize}
\item CRAN submission
\item \texttt{ArXiv} preprint in redaction
\item \texttt{CRAN} submission
\item Integrate the possibility of an additional criterion for clustering (e.g.
urbanization gradient~\cite{fisogniSeasonalTrajectoriesPlantpollinator2022})
\item Apply clustering to data from

View file

@ -708,6 +708,25 @@ Read\_Status\_Date: 2025-05-09T11:54:37.094Z},
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/5THEWLW6/Kipf et Welling - 2016 - Variational Graph Auto-Encoders.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/BBTHQNRZ/1611.html}
}
@book{kolaczykStatisticalAnalysisNetwork2009,
title = {Statistical {{Analysis}} of {{Network Data}}: {{Methods}} and {{Models}}},
shorttitle = {Statistical {{Analysis}} of {{Network Data}}},
author = {Kolaczyk, Eric D.},
date = {2009},
series = {Springer {{Series}} in {{Statistics}}},
publisher = {Springer New York},
location = {New York, NY},
doi = {10.1007/978-0-387-88146-1},
url = {https://link.springer.com/10.1007/978-0-387-88146-1},
urldate = {2025-05-26},
isbn = {978-0-387-88145-4 978-0-387-88146-1},
langid = {english},
keywords = {/unread},
annotation = {Read\_Status: New\\
Read\_Status\_Date: 2025-05-26T11:42:27.939Z},
file = {/home/louis/snap/zotero-snap/common/Zotero/storage/RQPMHFGB/Kolaczyk - 2009 - Statistical Analysis of Network Data Methods and Models.pdf}
}
@online{kumpulainenYourBlockOur2024,
title = {From Your {{Block}} to Our {{Block}}: {{How}} to {{Find Shared Structure}} between {{Stochastic Block Models}} over {{Multiple Graphs}}},
shorttitle = {From Your {{Block}} to Our {{Block}}},