diff --git a/annexe.tex b/annexe.tex index a73576a..1c76223 100644 --- a/annexe.tex +++ b/annexe.tex @@ -78,19 +78,23 @@ \begin{frame} \frametitle{On the BIC-L} - Raconter l'histoire dans l'ordre suivant : - \begin{itemize} - \item ICL = Méthode BIC (approx Laplace) sur la log complète, fait apparaître la - pénalité de complexité et pénalise l'entropie - \item ICLv = ICL mais avec les paramètres variationnels et l'entropie variationnelle - \item BIC-L = ICLv mais sans la pénalité sur l'entropie et la rajoutant à la fin - \end{itemize} + % Raconter l'histoire dans l'ordre suivant : + % \begin{itemize} + % \item ICL = Méthode BIC (approx Laplace) sur la log complète, fait apparaître la + % pénalité de complexité et pénalise l'entropie + % \item ICLv = ICL mais avec les paramètres variationnels et l'entropie variationnelle + % \item BIC-L = ICLv mais sans la pénalité sur l'entropie et la rajoutant à la fin + % \end{itemize} \begin{align*} - \text{BIC}(\hat{\theta}) & = \log p(\mathbf{Y};\hat{\theta}) - \frac{1}{2} \text{pen}(\dots) \\ - & = \Esp_{\mathbf{Z}, \mathbf{W}|\mathbf{Y}} [\underbrace{\log p(\mathbf{Y},\mathbf{Z},\mathbf{W};\hat{\theta})}_{\ell_c(\mathbf{Y},\mathbf{Z},\mathbf{W};\hat{\theta})}] + \mathcal{H}(p(\mathbf{Z},\mathbf{W}|\mathbf{Y})) - \frac{1}{2} \text{pen}(\dots) \\ - \text{ICL}(\hat{\theta}) & = \Esp_{\mathbf{Z}, \mathbf{W}|\mathbf{Y}} [\ell_c(\mathbf{Y},\mathbf{Z},\mathbf{W};\hat{\theta})] - \frac{1}{2} \text{pen}(\dots) \\ - \text{BIC-L}(\hat{\theta}, \hat{\tau}) & = \Esp_{\mathcal{R}_{\mathbf{Y}, \hat{\tau}}}[\ell_c(\mathbf{Y},\mathbf{Z},\mathbf{W};\hat{\theta}^{\text{var}})] + \mathcal{H}(\mathcal{R}_{\mathbf{Y}, \hat{\tau}}) - \frac{1}{2} \text{pen}(\dots) \\ + % \text{BIC}(\hat{\theta}) & = \log p(\mathbf{Y};\hat{\theta}) - \frac{1}{2} \text{pen}(\dots) \\ + % & = \Esp_{\mathbf{Z}, \mathbf{W}|\mathbf{Y}} [\underbrace{\log p(\mathbf{Y},\mathbf{Z},\mathbf{W};\hat{\theta})}_{\ell_c(\mathbf{Y},\mathbf{Z},\mathbf{W};\hat{\theta})}] + \mathcal{H}(p(\mathbf{Z},\mathbf{W}|\mathbf{Y})) - \frac{1}{2} \text{pen}(\dots) \\ + \text{ICL}(\hat{\theta}) & = \Esp_{\mathbf{Z}, \mathbf{W}|\mathbf{Y}} [\ell_c(\mathbf{Y},\mathbf{Z},\mathbf{W};\hat{\theta})] - \frac{1}{2} \text{pen}(\dots) \\ + \Esp_{\mathbf{Z}, \mathbf{W}|\mathbf{Y}}[\ell_c(\mathbf{Y},\mathbf{Z},\mathbf{W};\hat{\theta})] & = \log p(\mathbf{Y};\hat{\theta}) - \mathcal{H}(p(\mathbf{Z},\mathbf{W}|\mathbf{Y})) \\ + \text{And thus,}~\text{ICL}(\hat{\theta}) & = \log p(\mathbf{Y};\hat{\theta}) - \mathcal{H}(p(\mathbf{Z},\mathbf{W}|\mathbf{Y})) - \frac{1}{2} \text{pen}(\dots) \end{align*} + Recalling that $\mathbf{Z,W|Y}$ is inaccessible, we use the \emph{variational approximation} $\mathcal{R}_{\mathbf{Y},\hat{\tau}}$ and not penalizing the entropy of the distribution we derive the BIC-Like criterion: + \[ \text{BIC-L}(\hat{\theta}, \hat{\tau})= \Esp_{\mathcal{R}_{\mathbf{Y}, \hat{\tau}}}[\ell_c(\mathbf{Y},\mathbf{Z},\mathbf{W};\hat{\theta}^{\text{var}})] + \mathcal{H}(\mathcal{R}_{\mathbf{Y}, \hat{\tau}}) - \frac{1}{2} \text{pen}(\dots) + \] \end{frame} \section{Model selection} diff --git a/principal.tex b/principal.tex index 153408a..c1ffb78 100644 --- a/principal.tex +++ b/principal.tex @@ -3,113 +3,72 @@ \begin{frame} \frametitle{Why a network?} \begin{columns} - \begin{column}{0.5\textwidth} + \begin{column}{0.55\textwidth} \begin{columns} - \begin{column}{0.5\textwidth} - \begin{figure}[ht] + \only<1>{ + + \begin{column}{0.5\textwidth} + + \begin{figure}[ht] + \centering + \begin{tikzpicture}[scale=0.65] + \input{tikz/plantpollinatornetwork.tex} + \end{tikzpicture} + \caption{A toy network} + \label{fig:plants-pollin} + \end{figure} + \end{column} + \begin{column}{0.3\textwidth} \centering - \begin{tikzpicture}[scale=.6,rotate=270] - \input{tikz/plantpollinatornetwork.tex} - \end{tikzpicture} - \caption{Example of a network} - \label{fig:plants-pollin} - \end{figure} - \end{column} - \begin{column}{0.3\textwidth} - \centering - \begin{align*} - \begin{pmatrix} - 1 & 0 & 1 \\ - 1 & 0 & 0 \\ - 1 & 0 & 0 \\ - 1 & 1 & 0 - \end{pmatrix} - \end{align*} - \footnotesize - Associated bi-adjacency matrix - \end{column} + \begin{align*} + \begin{pmatrix} + 1 & 0 & 1 \\ + 1 & 0 & 0 \\ + 1 & 0 & 0 \\ + 1 & 1 & 0 + \end{pmatrix} + \end{align*} + \footnotesize + Associated bi-adjacency matrix + + \end{column} + } + \only<2>{ + \begin{column}{0.5\textwidth} + \begin{figure}[ht] + % \centering + \includegraphics[width=1\textwidth]{tikz/applications/baldock/graph-Baldock2019_Bristol.pdf} + \caption{Plant-pollinator network from + Bristol\newline\cite{baldockSystemsApproachReveals2019}} + \label{fig:bristol-network} + \end{figure} + \end{column} + \begin{column}{0.45\textwidth} + \centering + \begin{figure} + \includegraphics[width=0.7\textwidth]{tikz/applications/baldock/mat-Baldock2019_Bristol.pdf} + \caption{Adjacency matrix of the network} + \end{figure} + \end{column} + } \end{columns} - \begin{figure}[ht] - \centering - \includegraphics[width=0.7\textwidth]{tikz/applications/baldock/graph-Baldock2019_Bristol.pdf} - \caption{Plant-pollinator network from - Bristol\newline\cite{baldockSystemsApproachReveals2019}} - \label{fig:label} - \end{figure} \end{column} - \begin{column}{0.5\textwidth} - \begin{itemize} - \item Increasingly available - \item Modeling of various interactions, here ecosystems - \item Structure necessary for: biodiversity monitoring, robustness, risk of collapse - \end{itemize} + \begin{column}{0.4\textwidth} + \only<1>{ + \begin{itemize} + \item A bipartite graph $G = (U,V,E)$ + \item Can be encoded by a bi-adjacency matrix $Y \in \{0,1\}^{n_1 \times n_2}$ + \end{itemize}} + \only<2>{ + \begin{itemize} + \item Increasingly available + \item Modeling of various interactions, here ecosystems + \item Structure necessary for: biodiversity monitoring, robustness, risk of collapse + \end{itemize}} \end{column} \end{columns} \end{frame} -\begin{frame}{Analysis methods for a network} - Several methods~: - \begin{itemize} - \item Metrics at \begin{itemize} - \item node level: degree, centrality\dots - \item network level: density, nestedness\dots - \end{itemize} - \cite{kolaczykStatisticalAnalysisNetwork2009} - \item \textbf<2>{Node embedding and/or clustering with latent variable models} - \\\cite{snijdersEstimationPredictionStochastic1997,hoffLatentSpaceApproaches2002} - \item Node or network embedding with Graph Convolutional Networks - \\\cite{kipfVariationalGraphAutoEncoders2016a} - \end{itemize} -\end{frame} - -\begin{frame} - \addtocounter{footnote}{1} - \frametitle{Bipartite Stochastic Block Model (BiSBM\footnotemark[\thefootnote])}\framesubtitle{\cite{govaertEMAlgorithmBlock2005}} - \begin{columns} - \begin{column}{0.40\linewidth} - \begin{figure}[H] - \center - \begin{tikzpicture}[scale=0.35] - \input{tikz/lbm.tex} - \end{tikzpicture} - \caption{Example of LBM\footnotemark[\thefootnote]} - \label{fig:LBMvisu} - \end{figure} - \end{column} - \only<1>{ - \begin{column}{0.51\linewidth} - \begin{block}{Hierarchical model} - \vspace{-\baselineskip} - \begin{align*} - \forall q\in[\![ 1, Q_1]\!],~ & \mathbb{P}(Z_i = q) = \pi_q \\ - \forall r\in[\![ 1, Q_2]\!],~ & \mathbb{P}(W_j = r) = \rho_r \\ - & Y_{ij} | Z_i, W_j \sim \mathcal{F}(\alpha_{Z_i,W_j}) - \end{align*} - where $|\pi| = Q_1, |\rho| = Q_2, |\alpha| = Q_1 \times Q_2$ - \end{block} - \begin{block}{Concise LBM formula} - $Y \sim \mathcal{F}\text{-BiSBM}_{n_1,n_2}(Q_1, Q_2, \pi, \rho, \alpha)$ - \end{block} - \end{column}} - \only<2>{ - \begin{column}{0.51\linewidth} - With \begin{itemize} - \item $Q_1 = |\{{\color{blueind}\bullet},{\color{cyanind}\bullet},{\color{electricblue}\bullet}\}|$ fixed row blocks - \item $Q_2 = |\{{\color{burntorange}\bullet},{\color{goldenyellow}\bullet},{\color{peach}\bullet}\}|$ fixed column blocks - \end{itemize} - \begin{block}{Parameters} - \begin{itemize} - \item $\pi_{{\color{blueind}\bullet}} = \mathbb{P}(Z_i = {\color{blueind}\bullet})$ - \item $\rho_{{\color{burntorange}\bullet}} = \mathbb{P}(W_j = {\color{burntorange}\bullet})$ - \item $\alpha_{{\color{blueind}\bullet}{\color{burntorange}\bullet}} = \mathbb{P}(Y_{ij} = 1 | Z_i = {\color{blueind}\bullet}, W_j = {\color{burntorange}\bullet})$ - \end{itemize} - \end{block} - \end{column}} - \end{columns} - - \footnotetext[\thefootnote]{Commonly Known as \emph{Latent Block Model} (LBM) in the literature.} -\end{frame} - \begin{frame} \frametitle{Multiple networks} \only<1>{\begin{figure}[ht] @@ -155,13 +114,59 @@ \end{figure}} \end{frame} -\section[Bipartite collection models]{Bipartite network collection models} -\label{sec:extension-of-colsbm-to-bipartite-networks} +\begin{frame}{Analysis methods for a network} + Several methods~: + \begin{itemize} + \item Metrics at \begin{itemize} + \item node level: degree, centrality\dots + \item network level: density, nestedness\dots + \end{itemize} + \cite{kolaczykStatisticalAnalysisNetwork2009} + \item \textbf<2>{Node embedding and/or clustering with latent variable models} + \\\cite{snijdersEstimationPredictionStochastic1997,hoffLatentSpaceApproaches2002} + \item Node or network embedding with Graph Convolutional Networks + \\\cite{kipfVariationalGraphAutoEncoders2016a} + \end{itemize} +\end{frame} + +\begin{frame} + \addtocounter{footnote}{1} + \frametitle{Bipartite Stochastic Block Model (BiSBM\footnotemark[\thefootnote])}\framesubtitle{\cite{govaertEMAlgorithmBlock2005}} + \begin{columns} + \begin{column}{0.40\linewidth} + \begin{figure}[H] + \center + \begin{tikzpicture}[scale=0.35] + \input{tikz/lbm.tex} + \end{tikzpicture} + \caption{Example of LBM\footnotemark[\thefootnote]} + \label{fig:LBMvisu} + \end{figure} + \end{column} + \begin{column}{0.51\linewidth} + \begin{block}{Hierarchical model} + \vspace{-\baselineskip} + \begin{align*} + \forall q\in[\![ 1, Q_1]\!],\mathbb{P}(Z_i = q) = \pi_q \\ + \forall r\in[\![ 1, Q_2]\!],\mathbb{P}(W_j = r) = \rho_r \\ + Y_{ij} | Z_i = q, W_j = r \sim \mathcal{B}ern(\alpha_{q,r}) + \end{align*} + where $|\pi| = Q_1, |\rho| = Q_2, |\alpha| = Q_1 \times Q_2$ + \end{block} + \begin{block}{Concise BiSBM formula} + $Y \sim \mathcal{B}ern\text{-BiSBM}_{n_1,n_2}(Q_1, Q_2, \pi, \rho, \alpha)$ + \end{block} + \end{column} + \end{columns} + + \footnotetext[\thefootnote]{Commonly Known as \emph{Latent Block Model} (LBM) in the literature.} +\end{frame} + \begin{frame} \frametitle{Model 0: sep-BiSBM} \only<1-2>{ \begin{equation*} - \forall m \in \{1\dots M\}, Y^m \overset{ind}{\sim} \mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1\alert<2->{^m}, Q_2\alert<2->{^m}, \pi\alert<2->{^m}, \rho\alert<2->{^m}, \alpha\alert<2->{^m}) + \forall m \in \{1\dots M\}, Y^m \overset{ind}{\sim} \mathcal{B}ern\text{-BiSBM}_{n_1^m,n_2^m}(Q_1\alert<2->{^m}, Q_2\alert<2->{^m}, \pi\alert<2->{^m}, \rho\alert<2->{^m}, \alpha\alert<2->{^m}) \end{equation*}} \only<3>{ \begin{figure}[ht] @@ -169,23 +174,23 @@ \begin{subfigure}[ht]{0.42\textwidth} \centering \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Bristol.pdf} - \caption{Bristol} + \caption{Bristol, $Q_1 = 3, Q_2 = 3$} \end{subfigure} \begin{subfigure}[ht]{0.42\textwidth} \centering \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Edinburgh.pdf} - \caption{Edinburgh} + \caption{Edinburgh, $Q_1 = 3, Q_2 = 3$} \end{subfigure} \hfill \begin{subfigure}[ht]{0.42\textwidth} \centering \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Leeds.pdf} - \caption{Leeds} + \caption{Leeds, $Q_1 = 3, Q_2 = 2$} \end{subfigure} \begin{subfigure}[ht]{0.42\textwidth} \centering \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Reading.pdf} - \caption{Reading} + \caption{Reading, $Q_1 = 3, Q_2 = 3$} \end{subfigure} \vspace{-\baselineskip} \caption{Reordered adjacency matrices, using BiSBM for each network} @@ -194,12 +199,15 @@ } \end{frame} +\section[Bipartite collection models]{Bipartite network collection models} +\label{sec:extension-of-colsbm-to-bipartite-networks} + \begin{frame} \frametitle{Several joint models} \onslide<1->{ \begin{block}{\emph{iid}-colBiSBM} \[ \forall m \in \{1\dots M\}, Y^m \overset{iid}{\sim} - \mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2, \pi, \rho, \alpha) + \mathcal{B}ern\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2, \pi, \rho, \alpha) \] with $\theta = (\pi, \rho, \alpha)$. @@ -207,13 +215,12 @@ \onslide<2>{ \begin{block}{$\pi\rho$-colBiSBM} \[ \forall m \in \{1\dots M\}, Y^m \overset{ind}{\sim} - \mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2, \pi^m, \rho^m, \alpha) + \mathcal{B}ern\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2, \pi^m, \rho^m, \alpha) \] with $\theta = ((\pi\alert{^m})_{m=1,\dots, M}, (\rho\alert{^m})_{m=1,\dots, M}, \alpha)$. \end{block} - And intermediate models freeing $\pi$ or $\rho$. } \end{frame} % \begin{frame} @@ -251,7 +258,20 @@ % \end{frame} \section{Inference and model selection} \label{sec:inference-and-model-selection} -\begin{frame}{Parameter estimation} +\begin{frame}{Parameter estimation}{How ?} + \begin{align*} + \ell(\mathbf{Y};\theta) = & \sum_{m=1}^{M} \ell(Y^m;\theta) \\ + = & \sum_{m=1}^{M} \log \int_{\alert<2->{\mathcal{Z}^m\times\mathcal{W}^m}} \exp\{\ell_c(Y^m,Z^m,W^m;\theta)\} dZ^m dW^m \\ + = & \sum_{m=1}^{M} \log\int_{\alert<2->{\mathcal{Z}^m\times\mathcal{W}^m}}\exp\{\ell(Y^m | Z^m,W^m;\alpha) + \\ + & \ell(Z^m;\pi) + \ell(W^m;\rho)\} dZ^m dW^m + % & = \sum_{m=1}^{M} \sum_{i=1}^{n_1^m} \sum_{q=1}^{Q_1} Z_{iq} \log(\pi_q) + \sum_{j=1}^{n_2^m}\sum_{r=1}^{Q_2} W_{jr} \log(\rho_r) \\ + % & + \sum_{i,j}\sum_{q,r} Z_{iq}W_{jr} \log \mathcal{B}ern(Y_{ij};\alpha_{qr}) + \end{align*} + + \onslide<3>{ + We would like to use Expectation-Maximization (EM) algorithm~\parencite{dempsterMaximumLikelihoodIncomplete1977} but the law of $\mathbf{Z,W|Y},\theta^{(t-1)}$ is untractable due to dependence between rows and columns.} +\end{frame} +\begin{frame}{Parameter estimation}{Solution} By \emph{Variational EM}, as proposed by~\cite{daudinMixtureModelRandom2008, chabert-liddellLearningCommonStructures2024}. @@ -259,31 +279,35 @@ $\mathcal{R}_{Y^m,\tau}(Z^m, W^m) = \mathcal{R}^1_{Y^m,\tau}(Z^m) {\color{red}\times} - \mathcal{R}^2_{Y^m,\tau}(W^m) \Rightarrow$ independence between rows and columns. + \mathcal{R}^2_{Y^m,\tau}(W^m) \Rightarrow$ independence between rows and columns, mean field approximation. \end{block} \begin{multline*} \ell (\bm{Y};\theta) \geq \color{red}\sum_{m=1}^{M} \bigg( - \color{black} \mathcal{Q}^m(\theta\mid\theta^{(t)}) + + \color{black} \mathbb{E}_{\mathcal{R}_{Y^m,\tau}(Z^m,W^m)} + \left[ \ell_c(Y^m,Z^m,W^m ; \theta^{(t)}) \right] + \\ \mathcal{H}(\mathcal{R}_{Y^m,\theta^{(t)}} (Z^m, W^m)) \color{red}\bigg) \color{black} - \eqcolon \mathcal{J}(\tau;\theta) + \eqcolon \mathcal{J}(\mathcal{R}_{\mathbf{Y},\tau};\theta) \end{multline*} - where $\mathcal{Q}^m(\theta\mid\theta^{(t)}) = - \mathbb{E}_{Z^m,W^m - \sim \mathcal{R}_{Y^m,\tau}(.)} - \left[ \ell_c(Y^m,Z^m,W^m | \theta) \right] \,$ + where $\theta = (\pi, \rho, \alpha)$ for \emph{iid}-colBiSBM \end{frame} -\begin{frame} - \frametitle{Problem of choosing $(Q_1, Q_2)$} - Need to select $Q_1$ and $Q_2$. BIC-Like criterion\footnote{ICL + entropy - penalty} - +\begin{frame}{Selection criterion for $Q_1, Q_2$} + \cite{biernackiAssessingMixtureModel2000} introduced the Integrated Classification Likelihood (ICL). \begin{align*} - \text{BIC-L}(\bm{Y}, Q_1, Q_2) & = \mathbb{E}_{\mathcal{R}_{\mathbf{Y},\hat{\tau}}} [\ell_c(\bm{Y,Z,W};\hat{\theta}^{\text{var}})] + \mathcal{H(\mathcal{R}_{\mathbf{Y},\hat{\tau}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \\ - & = \mathcal{J(\mathcal{R}_{\mathbf{Y},\hat{\tau}}, \hat{\theta}^{\text{var}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) - \end{align*} - + \text{ICL}(\bm{Y}, Q_1, Q_2) & = \mathbb{E} [\ell_c(\bm{Y,Z,W};\hat{\theta})] -\frac{1}{2}\text{pen}(Q_1, Q_2) \\ + & = \ell(\mathbf{Y};\hat{\theta}) - \mathcal{H}(p(\mathbf{Z,W}|\mathbf{Y},\hat{\theta})) - \frac{1}{2}\text{pen}(Q_1, Q_2) + \end{align*} leads to low entropy clustering. + \onslide<2->{ + \begin{align*} + \text{BIC-L}(\bm{Y}, Q_1, Q_2) & = \mathbb{E}_{\mathcal{R}_{\mathbf{Y},\hat{\tau}}} [\ell_c(\bm{Y,Z,W};\hat{\theta}^{\text{var}})] + \mathcal{H(\mathcal{R}_{\mathbf{Y},\hat{\tau}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \\ + & = \mathcal{J(\mathcal{R}_{\mathbf{Y},\hat{\tau}}, \hat{\theta}^{\text{var}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) + \end{align*} + because we want fuzzier clustering. + } +\end{frame} +\begin{frame}{Practical problems of choosing $Q_1, Q_2$} \begin{alertblock}{Exploration problems} \begin{itemize} \item Exploration of a 2D grid is costly. \uncover<2->{$\rightarrow$ \textbf{Greedy @@ -299,47 +323,66 @@ \begin{frame} \frametitle{Results~\cite{baldockSystemsApproachReveals2019}} - \begin{figure}[ht] - \centering - \begin{subfigure}[t]{0.5\textwidth} + \only<1>{ + \begin{figure}[ht] \centering - \includegraphics[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Bristol.pdf} - \caption{Bristol} - \end{subfigure}\hfil - \begin{subfigure}[t]{0.5\textwidth} + \begin{tikzpicture}[every every node/.style={anchor=south west, inner sep=0pt}, x=1mm, y=1mm] + \node (struct) at (0,0) {\includegraphics[width=0.8\textwidth]{tikz/applications/baldock/shared-mixture-iid.pdf}}; + \node[isosceles triangle, + isosceles triangle apex angle=10, + draw, + rotate=270, + shading = axis, + top color=blue!50, + bottom color=blue!1!white, + anchor=right corner, minimum height=25mm, label={[label distance = 2mm]180:Generalists}, label={[label distance = 2mm]0:Specialists}] (T) at ($(struct.east)+(1.25,8)$) {}; + + \end{tikzpicture} + \caption{Shared structure ($\alpha$ matrix) and proportions ($\pi$ and $\rho$) of the four networks} + \label{fig:shared-mixture} + \end{figure}} + + \only<2>{\begin{figure}[ht] \centering - \includegraphics[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Edinburgh.pdf} - \caption{Edinburgh} - \end{subfigure} - \newline - \begin{subfigure}[ht]{0.5\textwidth} - \centering - \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf} - \caption{Leeds} - \end{subfigure}\hfil - \begin{subfigure}[ht]{0.5\textwidth} - \centering - \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Reading.pdf} - \caption{Reading} - \end{subfigure} - \caption{Reordered adjacency matrices by \emph{iid}-colBiSBM,~\cite{baldockSystemsApproachReveals2019}} - \end{figure} + \begin{subfigure}[t]{0.5\textwidth} + \centering + \includegraphics[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Bristol.pdf} + \caption{Bristol, $Q_1 = 3, Q_2 = 5$} + \end{subfigure}\hfil + \begin{subfigure}[t]{0.5\textwidth} + \centering + \includegraphics[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Edinburgh.pdf} + \caption{Edinburgh, $Q_1 = 3, Q_2 = 5$} + \end{subfigure} + \newline + \begin{subfigure}[ht]{0.5\textwidth} + \centering + \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf} + \caption{Leeds, $Q_1 = 3, Q_2 = 5$} + \end{subfigure}\hfil + \begin{subfigure}[ht]{0.5\textwidth} + \centering + \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Reading.pdf} + \caption{Reading, $Q_1 = 3, Q_2 = 5$} + \end{subfigure} + \caption{Reordered adjacency matrices by \emph{iid}-colBiSBM,~\cite{baldockSystemsApproachReveals2019}} + \end{figure}} \end{frame} \begin{frame} - \frametitle{Results~\cite{baldockSystemsApproachReveals2019} focus on Leeds} + \frametitle{Focus on Leeds} \captionsetup{font=normalsize} \begin{figure}[ht] \centering \begin{subfigure}[t]{0.5\textwidth} \centering \includegraphics[width=1\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Leeds.pdf} - \caption{Leeds with sep-BiSBM} + \caption{sep-BiSBM, $Q_1 = 3, Q_2 = 2$} \end{subfigure}\hfill \begin{subfigure}[t]{0.5\textwidth} \centering \includegraphics[width=1\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf} - \caption{Leeds with \emph{iid}-colBiSBM} + \caption{\emph{iid}-colBiSBM, $Q_1 = 3, Q_2 = 5$} \end{subfigure} \end{figure} \end{frame} @@ -421,19 +464,7 @@ } \end{frame} -\section{Extension and conclusion} - -\begin{frame}[allowframebreaks] - \frametitle{Application to~\cite{baldockDailyTemporalStructure2011, - baldockSystemsApproachReveals2019}} - TODO Put $\alpha$ plots and tree structure of partition - \begin{figure}[t] - \centering - \includegraphics[scale=0.1]{backup-app-iid-struct1.png} - \includegraphics[scale=0.2]{backup-app-iid-struct2.png} - \caption{Model $iid$, separate Kenyan (left) and British (right) networks} - \end{figure} -\end{frame} +\section{Conclusion} \begin{frame} \frametitle{Conclusion and perspectives} \begin{block}{Capabilities} @@ -446,12 +477,12 @@ \end{block} \begin{block}{Package and applications} \begin{itemize} - \item \texttt{ArXiv} preprint in redaction - \item \texttt{CRAN} submission - \item Integrate the possibility of an additional criterion for clustering (e.g. - urbanization gradient~\cite{fisogniSeasonalTrajectoriesPlantpollinator2022}) + \item Article in redaction + \item \texttt{R} package \texttt{colSBM} on + Github\footnote{\url{https://github.com/GrossSBM/colSBM}} \item Apply clustering to data from \cite{pichonTellingMutualisticAntagonistic2024,doreRelativeEffectsAnthropogenic2021} + to tell if interaction drives the structure of the network. \end{itemize} \end{block} \end{frame} \ No newline at end of file diff --git a/references.bib b/references.bib index d01d845..b85774d 100644 --- a/references.bib +++ b/references.bib @@ -297,6 +297,26 @@ file = {/home/louis/snap/zotero-snap/common/Zotero/storage/439HK27B/Daudin et al. - 2008 - A mixture model for random graphs.pdf;/home/louis/snap/zotero-snap/common/Zotero/storage/HVVF5MNY/daudin2007.pdf.pdf} } +@article{dempsterMaximumLikelihoodIncomplete1977, + title = {Maximum {{Likelihood}} from {{Incomplete Data}} via the {{EM Algorithm}}}, + author = {Dempster, A. P. and Laird, N. M. and Rubin, D. B.}, + date = {1977}, + journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)}, + volume = {39}, + number = {1}, + eprint = {2984875}, + eprinttype = {jstor}, + pages = {1--38}, + publisher = {[Royal Statistical Society, Oxford University Press]}, + issn = {0035-9246}, + url = {https://www.jstor.org/stable/2984875}, + urldate = {2025-05-27}, + abstract = {A broadly applicable algorithm for computing maximum likelihood estimates from incomplete data is presented at various levels of generality. Theory showing the monotone behaviour of the likelihood and convergence of the algorithm is derived. Many examples are sketched, including missing value situations, applications to grouped, censored or truncated data, finite mixture models, variance component estimation, hyperparameter estimation, iteratively reweighted least squares and factor analysis.}, + keywords = {/unread}, + annotation = {Read\_Status: New\\ +Read\_Status\_Date: 2025-05-27T16:20:41.925Z} +} + @article{desjardins-proulxEcologicalInteractionsNetflix2017, title = {Ecological Interactions and the {{Netflix}} Problem}, author = {Desjardins-Proulx, Philippe and Laigle, Idaline and Poisot, Timothée and Gravel, Dominique}, diff --git a/tikz/applications/baldock/bisbm-mat-Baldock2019_Bristol.pdf b/tikz/applications/baldock/bisbm-mat-Baldock2019_Bristol.pdf index bc1a35d..2e4d978 100644 Binary files a/tikz/applications/baldock/bisbm-mat-Baldock2019_Bristol.pdf and b/tikz/applications/baldock/bisbm-mat-Baldock2019_Bristol.pdf differ diff --git a/tikz/applications/baldock/bisbm-mat-Baldock2019_Edinburgh.pdf b/tikz/applications/baldock/bisbm-mat-Baldock2019_Edinburgh.pdf index c5c57b9..d7d43d6 100644 Binary files a/tikz/applications/baldock/bisbm-mat-Baldock2019_Edinburgh.pdf and b/tikz/applications/baldock/bisbm-mat-Baldock2019_Edinburgh.pdf differ diff --git a/tikz/applications/baldock/bisbm-mat-Baldock2019_Leeds.pdf b/tikz/applications/baldock/bisbm-mat-Baldock2019_Leeds.pdf index 02abd3b..5db7ae9 100644 Binary files a/tikz/applications/baldock/bisbm-mat-Baldock2019_Leeds.pdf and b/tikz/applications/baldock/bisbm-mat-Baldock2019_Leeds.pdf differ diff --git a/tikz/applications/baldock/bisbm-mat-Baldock2019_Reading.pdf b/tikz/applications/baldock/bisbm-mat-Baldock2019_Reading.pdf index 465121c..57d4284 100644 Binary files a/tikz/applications/baldock/bisbm-mat-Baldock2019_Reading.pdf and b/tikz/applications/baldock/bisbm-mat-Baldock2019_Reading.pdf differ diff --git a/tikz/applications/baldock/colbisbm-mat-Baldock2019_Bristol.pdf b/tikz/applications/baldock/colbisbm-mat-Baldock2019_Bristol.pdf index 419852f..a35a1d5 100644 Binary files a/tikz/applications/baldock/colbisbm-mat-Baldock2019_Bristol.pdf and b/tikz/applications/baldock/colbisbm-mat-Baldock2019_Bristol.pdf differ diff --git a/tikz/applications/baldock/colbisbm-mat-Baldock2019_Edinburgh.pdf b/tikz/applications/baldock/colbisbm-mat-Baldock2019_Edinburgh.pdf index 59eb8db..3d1094f 100644 Binary files a/tikz/applications/baldock/colbisbm-mat-Baldock2019_Edinburgh.pdf and b/tikz/applications/baldock/colbisbm-mat-Baldock2019_Edinburgh.pdf differ diff --git a/tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf b/tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf index 0cfa2e7..e378f0c 100644 Binary files a/tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf and b/tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf differ diff --git a/tikz/applications/baldock/colbisbm-mat-Baldock2019_Reading.pdf b/tikz/applications/baldock/colbisbm-mat-Baldock2019_Reading.pdf index 9029443..ab2e223 100644 Binary files a/tikz/applications/baldock/colbisbm-mat-Baldock2019_Reading.pdf and b/tikz/applications/baldock/colbisbm-mat-Baldock2019_Reading.pdf differ diff --git a/tikz/applications/baldock/shared-iid.pdf b/tikz/applications/baldock/shared-iid.pdf index bd4349b..01bf757 100644 Binary files a/tikz/applications/baldock/shared-iid.pdf and b/tikz/applications/baldock/shared-iid.pdf differ diff --git a/tikz/applications/baldock/shared-mixture-iid.pdf b/tikz/applications/baldock/shared-mixture-iid.pdf new file mode 100644 index 0000000..316c8a4 Binary files /dev/null and b/tikz/applications/baldock/shared-mixture-iid.pdf differ