presentation-colbisbm/principal.tex
2025-05-28 11:27:22 +02:00

492 lines
No EOL
23 KiB
TeX

\section{Model Context}
\label{sec:context-of-the-model}
\begin{frame}
\frametitle{Why a network?}
\begin{columns}
\begin{column}{0.55\textwidth}
\begin{columns}
\only<1>{
\begin{column}{0.5\textwidth}
\begin{figure}[ht]
\centering
\begin{tikzpicture}[scale=0.65]
\input{tikz/plantpollinatornetwork.tex}
\end{tikzpicture}
\caption{A toy network}
\label{fig:plants-pollin}
\end{figure}
\end{column}
\begin{column}{0.3\textwidth}
\centering
\begin{align*}
\begin{pmatrix}
1 & 0 & 1 \\
1 & 0 & 0 \\
1 & 0 & 0 \\
1 & 1 & 0
\end{pmatrix}
\end{align*}
\footnotesize
Associated bi-adjacency matrix
\end{column}
}
\only<2>{
\begin{column}{0.5\textwidth}
\begin{figure}[ht]
% \centering
\includegraphics[width=1\textwidth]{tikz/applications/baldock/graph-Baldock2019_Bristol.pdf}
\caption{Plant-pollinator network from
Bristol\newline\cite{baldockSystemsApproachReveals2019}}
\label{fig:bristol-network}
\end{figure}
\end{column}
\begin{column}{0.45\textwidth}
\centering
\begin{figure}
\includegraphics[width=0.7\textwidth]{tikz/applications/baldock/mat-Baldock2019_Bristol.pdf}
\caption{Adjacency matrix of the network}
\end{figure}
\end{column}
}
\end{columns}
\end{column}
\begin{column}{0.4\textwidth}
\only<1>{
\begin{itemize}
\item A bipartite graph $G = (U,V,E)$
\item Can be encoded by a bi-adjacency matrix $Y \in \{0,1\}^{n_1 \times n_2}$
\end{itemize}}
\only<2>{
\begin{itemize}
\item Increasingly available
\item Modeling of various interactions, here ecosystems
\item Structure necessary for: biodiversity monitoring, robustness, risk of collapse
\end{itemize}}
\end{column}
\end{columns}
\end{frame}
\begin{frame}
\frametitle{Multiple networks}
\only<1>{\begin{figure}[ht]
\centering
\begin{subfigure}[ht]{0.475\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/mat-Baldock2019_Bristol.pdf}
\caption{Bristol}
\end{subfigure}
\begin{subfigure}[ht]{0.475\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/mat-Baldock2019_Edinburgh.pdf}
\caption{Edinburgh}
\end{subfigure}
\hfill
\begin{subfigure}[ht]{0.475\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/mat-Baldock2019_Leeds.pdf}
\caption{Leeds}
\end{subfigure}
\begin{subfigure}[ht]{0.475\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/mat-Baldock2019_Reading.pdf}
\caption{Reading}
\end{subfigure}
\caption{Adjacency matrices,~\cite{baldockSystemsApproachReveals2019}}
\label{fig:adj}
\end{figure}}
\only<2>{
\addtocounter{figure}{1}
\begin{figure}[ht]
\centering
\begin{tikzpicture}
\tikzset{city/.style={circle, fill=white, draw=red, inner sep=0.65,minimum size=0}}
\node (map) at (0,0) {\includegraphics[width=0.35\textwidth]{img/uk-map.png}};
\node[city, pin={[pin edge={<-,thick}, pin distance = 25mm]180:Leeds}] (leeds) at (0.2,0.03) {};
\node[city, pin={[pin edge={<-,thick}, pin distance = 30mm]0:Edinburgh}] (edinburgh) at (-0.58,1.84) {};
\node[city, pin={[pin edge={<-,thick}, pin distance = 20mm]180:Bristol}] (bristol) at (-0.3,-1.82) {};
\node[city, pin={[pin edge={<-,thick}, pin distance = 20mm]0:Reading}] (reading) at (0.5,-1.82) {};
\end{tikzpicture}
\caption{Map of the four cities}
\end{figure}}
\end{frame}
\begin{frame}{Analysis methods for a network}
Several methods~:
\begin{itemize}
\item Metrics at \begin{itemize}
\item node level: degree, centrality\dots
\item network level: density, nestedness\dots
\end{itemize}
\cite{kolaczykStatisticalAnalysisNetwork2009}
\item \textbf<2>{Node embedding and/or clustering with latent variable models}
\\\cite{snijdersEstimationPredictionStochastic1997,hoffLatentSpaceApproaches2002}
\item Node or network embedding with Graph Convolutional Networks
\\\cite{kipfVariationalGraphAutoEncoders2016a}
\end{itemize}
\end{frame}
\begin{frame}
\addtocounter{footnote}{1}
\frametitle{Bipartite Stochastic Block Model (BiSBM\footnotemark[\thefootnote])}\framesubtitle{\cite{govaertEMAlgorithmBlock2005}}
\begin{columns}
\begin{column}{0.40\linewidth}
\begin{figure}[H]
\center
\begin{tikzpicture}[scale=0.35]
\input{tikz/lbm.tex}
\end{tikzpicture}
\caption{Example of LBM\footnotemark[\thefootnote]}
\label{fig:LBMvisu}
\end{figure}
\end{column}
\begin{column}{0.51\linewidth}
\begin{block}{Hierarchical model}
\vspace{-\baselineskip}
\begin{align*}
\forall q\in[\![ 1, Q_1]\!],\mathbb{P}(Z_i = q) = \pi_q \\
\forall r\in[\![ 1, Q_2]\!],\mathbb{P}(W_j = r) = \rho_r \\
Y_{ij} | Z_i = q, W_j = r \sim \mathcal{B}ern(\alpha_{q,r})
\end{align*}
where $|\pi| = Q_1, |\rho| = Q_2, |\alpha| = Q_1 \times Q_2$
\end{block}
\begin{block}{Concise BiSBM formula}
$Y \sim \mathcal{B}ern\text{-BiSBM}_{n_1,n_2}(Q_1, Q_2, \pi, \rho, \alpha)$
\end{block}
\end{column}
\end{columns}
\footnotetext[\thefootnote]{Commonly Known as \emph{Latent Block Model} (LBM) in the literature.}
\end{frame}
\begin{frame}
\frametitle{Model 0: sep-BiSBM}
\only<1-2>{
\begin{equation*}
\forall m \in \{1\dots M\}, Y^m \overset{ind}{\sim} \mathcal{B}ern\text{-BiSBM}_{n_1^m,n_2^m}(Q_1\alert<2->{^m}, Q_2\alert<2->{^m}, \pi\alert<2->{^m}, \rho\alert<2->{^m}, \alpha\alert<2->{^m})
\end{equation*}}
\only<3>{
\begin{figure}[ht]
\centering
\begin{subfigure}[ht]{0.42\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Bristol.pdf}
\caption{Bristol, $Q_1 = 3, Q_2 = 3$}
\end{subfigure}
\begin{subfigure}[ht]{0.42\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Edinburgh.pdf}
\caption{Edinburgh, $Q_1 = 3, Q_2 = 3$}
\end{subfigure}
\hfill
\begin{subfigure}[ht]{0.42\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Leeds.pdf}
\caption{Leeds, $Q_1 = 3, Q_2 = 2$}
\end{subfigure}
\begin{subfigure}[ht]{0.42\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Reading.pdf}
\caption{Reading, $Q_1 = 3, Q_2 = 3$}
\end{subfigure}
\vspace{-\baselineskip}
\caption{Reordered adjacency matrices, using BiSBM for each network}
\label{fig:adj-reord}
\end{figure}
}
\end{frame}
\section[Bipartite collection models]{Bipartite network collection models}
\label{sec:extension-of-colsbm-to-bipartite-networks}
\begin{frame}
\frametitle{Several joint models}
\onslide<1->{ \begin{block}{\emph{iid}-colBiSBM}
\[
\forall m \in \{1\dots M\}, Y^m \overset{iid}{\sim}
\mathcal{B}ern\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2, \pi, \rho, \alpha)
\]
with $\theta = (\pi, \rho, \alpha)$.
\end{block}}
\onslide<2>{ \begin{block}{$\pi\rho$-colBiSBM}
\[
\forall m \in \{1\dots M\}, Y^m \overset{ind}{\sim}
\mathcal{B}ern\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2, \pi^m, \rho^m, \alpha)
\]
with $\theta = ((\pi\alert{^m})_{m=1,\dots, M}, (\rho\alert{^m})_{m=1,\dots,
M}, \alpha)$.
\end{block}
}
\begin{itemize}
\item No shared nodes across networks
\item Agnostic of structure
\end{itemize}
\end{frame}
% \begin{frame}
% \frametitle{Parameter estimation}
% % DONE say that tau i q m c' is the probability that Zim = q, approximation of the variational probability. Because we impose independence
% % By maximizing a variational lower bound of the
% % log-likelihood of the observed data.
% Maximizing the log-likelihood?
% \begin{block}{log-likelihood and complete log-likelihood}
% \[
% \ell(\bm{Y};\theta) = \sum_{\bm{Z,W}\in \bm{\mathcal{Z}\times\mathcal{W}}} \ell_c(\bm{Y}, \bm{Z}, \bm{W};\theta)
% \]
% with $\bm{\mathcal{Z}} = \{1,\dots,\alert<2>{Q_1}\}^{\alert<2>{n}},
% \bm{\mathcal{W}} = \{1,\dots,\alert<2>{Q_2}\}^{\alert<2>{n}}$
% \end{block}
% \uncover<3>{So, classic algorithm $\Rightarrow$
% \emph{Expectation-Maximization} (EM).}
% \end{frame}
% \begin{frame}
% \frametitle{By classic EM}
% At iteration $(t)$:
% \begin{itemize}
% \item[$\bullet$]\textbf{E Step}: calculate
% $$ \mathcal{Q}(\theta | \theta^{(t-1)}) = \mathbb E_{\alert<2>{\bm Z, \bm W | \bm Y, \theta^{(t-1)}} } \left[\ell_c(\bm Y, \bm W, \bm Z; \theta) \right] $$
% \item[$\bullet$]\textbf{M Step}:
% $$ \theta^{(t)} = \arg \max_{\theta} \mathcal{Q}(\theta | \theta^{(t-1)})$$
% \end{itemize}
% \uncover<2>{
% \begin{alertblock}{Problem for classic EM}
% Law of $\bm{Z,W|Y},\theta^{(t-1)}$ inaccessible
% \end{alertblock}}
% \end{frame}
\section{Inference and model selection}
\label{sec:inference-and-model-selection}
\begin{frame}{Parameter estimation}{How ?}
\begin{align*}
\ell(\mathbf{Y};\theta) = & \sum_{m=1}^{M} \ell(Y^m;\theta) \\
= & \sum_{m=1}^{M} \log \int_{\alert<2->{\mathcal{Z}^m\times\mathcal{W}^m}} \exp\{\ell_c(Y^m,Z^m,W^m;\theta)\} dZ^m dW^m \\
= & \sum_{m=1}^{M} \log\int_{\alert<2->{\mathcal{Z}^m\times\mathcal{W}^m}}\exp\{\ell(Y^m | Z^m,W^m;\alpha) + \\
& \ell(Z^m;\pi) + \ell(W^m;\rho)\} dZ^m dW^m
% & = \sum_{m=1}^{M} \sum_{i=1}^{n_1^m} \sum_{q=1}^{Q_1} Z_{iq} \log(\pi_q) + \sum_{j=1}^{n_2^m}\sum_{r=1}^{Q_2} W_{jr} \log(\rho_r) \\
% & + \sum_{i,j}\sum_{q,r} Z_{iq}W_{jr} \log \mathcal{B}ern(Y_{ij};\alpha_{qr})
\end{align*}
\onslide<3>{
We would like to use Expectation-Maximization (EM) algorithm~\parencite{dempsterMaximumLikelihoodIncomplete1977} but the law of $\mathbf{Z,W|Y},\theta^{(t-1)}$ is untractable due to dependence between row and column groups.}
\end{frame}
\begin{frame}{Parameter estimation}{Solution}
By \emph{Variational EM}, as proposed
by~\cite{daudinMixtureModelRandom2008} and adapted for joint simple networks
by~\cite{chabert-liddellLearningCommonStructures2024}.
\begin{block}{Variational approximation of $\bm{Z,W|Y},\theta^{(t-1)}$}
$\mathcal{R}_{Y^m,\tau}(Z^m, W^m) =
\mathcal{R}^1_{Y^m,\tau}(Z^m)
{\color{red}\times}
\mathcal{R}^2_{Y^m,\tau}(W^m) \Rightarrow$ independence between rows and columns, mean field approximation.
\end{block}
\begin{multline*}
\ell (\bm{Y};\theta) \geq \color{red}\sum_{m=1}^{M} \bigg(
\color{black} \mathbb{E}_{\mathcal{R}_{Y^m,\tau}(Z^m,W^m)}
\left[ \ell_c(Y^m,Z^m,W^m ; \theta^{(t)}) \right] + \\
\mathcal{H}(\mathcal{R}_{Y^m,\theta^{(t)}}
(Z^m, W^m))
\color{red}\bigg) \color{black}
\eqcolon \mathcal{J}(\mathcal{R}_{\mathbf{Y},\tau};\theta)
\end{multline*}
where $\theta = (\pi, \rho, \alpha)$ for \emph{iid}-colBiSBM
\end{frame}
\begin{frame}{Selection criterion for $Q_1, Q_2$}
\cite{biernackiAssessingMixtureModel2000} introduced the Integrated Classification Likelihood (ICL):
\begin{align*}
\text{ICL}(\bm{Y}, Q_1, Q_2) & = \mathbb{E} [\ell_c(\bm{Y,Z,W};\hat{\theta})] -\frac{1}{2}\text{pen}(Q_1, Q_2) \\
& = \ell(\mathbf{Y};\hat{\theta}) - \mathcal{H}(p(\mathbf{Z,W}|\mathbf{Y},\hat{\theta})) - \frac{1}{2}\text{pen}(Q_1, Q_2)
\end{align*} leads to low entropy clustering. Common in literature for SBM.
\onslide<2->{
\begin{align*}
\text{BIC-L}(\bm{Y}, & Q_1, Q_2) = \mathbb{E}_{\mathcal{R}_{\mathbf{Y},\hat{\tau}}} [\ell_c(\bm{Y,Z,W};\hat{\theta}^{\text{var}})] + \mathcal{H(\mathcal{R}_{\mathbf{Y},\hat{\tau}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \\
& = \mathcal{J(\mathcal{R}_{\mathbf{Y},\hat{\tau}}, \hat{\theta}^{\text{var}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \textcolor{red}{\leq \log p(\mathbf{Y};\hat{\theta}^{\text{MV}})- \frac{1}{2}\text{pen}(Q_1, Q_2)} \\
\end{align*}
because we want fuzzier clustering.
}
\end{frame}
\begin{frame}{Practical problems of choosing $Q_1, Q_2$}
\begin{alertblock}{Exploration problems}
\begin{itemize}
\item Exploration of a 2D grid is costly. \uncover<2->{$\rightarrow$ \textbf{Greedy
approach} and \textbf{sliding window}}
\item Sensitivity to initializations. \uncover<3->{$\rightarrow$ \textbf{Spectral
clustering} and \textbf{split \& merge} approach}
\end{itemize}
\end{alertblock}
\end{frame}
\section{Application}
\label{sec:application}
\begin{frame}
\frametitle{Results~\cite{baldockSystemsApproachReveals2019}}
\only<1>{
\begin{figure}[ht]
\centering
\begin{tikzpicture}[every every node/.style={anchor=south west, inner sep=0pt}, x=1mm, y=1mm]
\node (struct) at (0,0) {\includegraphics[width=0.8\textwidth]{tikz/applications/baldock/shared-mixture-iid.pdf}};
\node[isosceles triangle,
isosceles triangle apex angle=10,
draw,
rotate=270,
shading = axis,
top color=blue!50,
bottom color=blue!1!white,
anchor=right corner, minimum height=25mm, label={[label distance = 2mm]180:Generalists}, label={[label distance = 2mm]0:Specialists}] (T) at ($(struct.east)+(1.25,8)$) {};
\end{tikzpicture}
\caption{Shared structure ($\alpha$ matrix) and proportions ($\pi$ and $\rho$) of the four networks}
\label{fig:shared-mixture}
\end{figure}}
\only<2>{\begin{figure}[ht]
\centering
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Bristol.pdf}
\caption{Bristol, $Q_1 = 3, Q_2 = 5$}
\end{subfigure}\hfil
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Edinburgh.pdf}
\caption{Edinburgh, $Q_1 = 3, Q_2 = 5$}
\end{subfigure}
\newline
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf}
\caption{Leeds, $Q_1 = 3, Q_2 = 5$}
\end{subfigure}\hfil
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Reading.pdf}
\caption{Reading, $Q_1 = 3, Q_2 = 5$}
\end{subfigure}
\caption{Reordered adjacency matrices by \emph{iid}-colBiSBM,~\cite{baldockSystemsApproachReveals2019}}
\end{figure}}
\end{frame}
\begin{frame}
\frametitle{Focus on Leeds}
\captionsetup{font=normalsize}
\begin{figure}[ht]
\centering
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=1\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Leeds.pdf}
\caption{sep-BiSBM, $Q_1 = 3, Q_2 = 2$}
\end{subfigure}\hfill
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=1\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf}
\caption{\emph{iid}-colBiSBM, $Q_1 = 3, Q_2 = 5$}
\end{subfigure}
\end{figure}
\end{frame}
\begin{frame}{\emph{Bombus}}
\only<1>{
\begin{figure}
\captionsetup{font=normalsize}
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{img/baldock/bombus-hortorum.jpeg}
\caption{\emph{Bombus Hortorum} or garden bumblebee}
\end{subfigure}\hfill
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{img/baldock/bombus-lapidarius.jpeg}
\caption{\emph{Bombus Lapidarius} or red-tailed bumblebee}
\end{subfigure}
\end{figure}
}
\only<2->{
\begin{columns}
\begin{column}{0.2\textwidth}
\begin{figure}
\onslide<3>{
\begin{subfigure}[t]{0.7\textwidth}
\centering
\includegraphics[width=1\textwidth]{img/baldock/bombus-hortorum.jpeg}
\caption{\emph{Bombus Hortorum} or garden bumblebee}
\end{subfigure}
}
\onslide<4>{
\begin{subfigure}[t]{0.7\textwidth}
\centering
\includegraphics[width=1\textwidth]{img/baldock/bombus-lapidarius.jpeg}
\caption{\emph{Bombus Lapidarius} or red-tailed bumblebee}
\end{subfigure}
}
\end{figure}
\end{column}
\begin{column}{0.7\textwidth}
\begin{figure}
\centering
\begin{tikzpicture}[every every node/.style={anchor=south west, inner sep=0pt}, x=1mm, y=1mm]
\node (struct) at (0,0) {\includegraphics[width=0.8\textwidth]{tikz/applications/baldock/shared-iid.pdf}};
\node (gen) at (-29,14.8) {};
\node (interm) at (-29,2.5) {};
\node (spe) at (-29,-9.8) {};
\node[left = 3mm of spe] (@aux) {\phantom{B, E, R}};
\node[isosceles triangle,
isosceles triangle apex angle=10,
draw,
rotate=270,
shading = axis,
top color=blue!50,
bottom color=blue!1!white,
anchor=right corner, minimum height=42mm, label={[label distance = 2mm]207:Generalists}, label={[label distance = 12mm]357:Specialists}] (T) at ($(struct.north east)+(-1,-2.5)$) {};
\only<3>{
\node[left = 3mm of gen] (towns_gen_garden) {B, L};
\node[left = 3mm of spe] (towns_spe_garden) {\phantom{B, }E, R};
\path (towns_gen_garden) edge[->,thick] (gen);
\path (towns_spe_garden) edge[->,thick] (spe);
}
\only<4>{
\node[left = 3mm of interm] (towns_interm_red) {L};
\node[left = 3mm of spe] (towns_spe_red) {B, E, R};
\path (towns_interm_red) edge[->,thick] (interm);
\path (towns_spe_red) edge[->,thick] (spe);
}
\end{tikzpicture}
\caption{Shared structure ($\alpha$ matrix) of the four networks}\label{fig:shared}
\end{figure}
\end{column}
\end{columns}
}
\end{frame}
\section{Conclusion}
\begin{frame}
\frametitle{Conclusion and perspectives}
\begin{block}{Capabilities}
\begin{itemize}
\item 4 models including 3 with flexibility on at least one of
the dimensions (adaptability to data).
\item Detect classic and less classic structures in an agnostic way.
\item Partition a set of networks according to their structures.
\end{itemize}
\end{block}
\begin{block}{Package and applications}
\begin{itemize}
\item Article in redaction
\item \texttt{R} package \texttt{colSBM} on
Github\footnote{\url{https://github.com/GrossSBM/colSBM}}
\item Apply clustering to data from
\cite{pichonTellingMutualisticAntagonistic2024,doreRelativeEffectsAnthropogenic2021}
to tell if interaction drives the structure of the network.
\end{itemize}
\end{block}
\end{frame}