diff --git a/annexe.tex b/annexe.tex index 1c76223..55f0ebd 100644 --- a/annexe.tex +++ b/annexe.tex @@ -1,3 +1,16 @@ +\section{Clustering} + +\begin{frame}{Clustering algorithm} + \centering + \vspace{0.25\baselineskip} + \begin{tikzpicture}[scale=0.85] + \input{tikz/clustering.tex} + \end{tikzpicture} + \[ + D_{\mathcal{M}}(m,m') = \sum_{q = 1}^{Q_1} \sum_{r = 1}^{Q_2} \max(\widetilde{\pi}_{q}^{m}, \widetilde{\pi}_{q}^{m'}) \left( \widetilde{\alpha}_{qr}^{m} - \widetilde{\alpha}_{qr}^{m'}\right)^{2} \max(\widetilde{\rho}_{r}^{m}, \widetilde{\rho}_{r}^{m'}) + \] +\end{frame} + \section{VEM} \begin{frame}{Developed formula of variational EM} @@ -75,6 +88,7 @@ \end{align*} Thus $\ell(\bY;\theta) - \KL{\Ryt}{\log \Prob(\bZ,\bW|\bY;\theta)} = \mathcal{J}(\tau;\theta) \qed$ \end{frame} +\section{Model selection} \begin{frame} \frametitle{On the BIC-L} @@ -97,8 +111,6 @@ \] \end{frame} -\section{Model selection} - \begin{frame} \frametitle{Choice of $(Q_1,Q_2)$ - Greedy approach} \begin{columns} @@ -155,78 +167,4 @@ \end{block}} \end{column} \end{columns} -\end{frame} - -\section{Clustering} - -\begin{frame}{Clustering algorithm} - \centering - \vspace{0.25\baselineskip} - \begin{tikzpicture}[scale=0.85] - \input{tikz/clustering.tex} - \end{tikzpicture} - \[ - D_{\mathcal{M}}(m,m') = \sum_{q = 1}^{Q_1} \sum_{r = 1}^{Q_2} \max(\widetilde{\pi}_{q}^{m}, \widetilde{\pi}_{q}^{m'}) \left( \widetilde{\alpha}_{qr}^{m} - \widetilde{\alpha}_{qr}^{m'}\right)^{2} \max(\widetilde{\rho}_{r}^{m}, \widetilde{\rho}_{r}^{m'}) - \] -\end{frame} - -\section{Results~\cite{baldockSystemsApproachReveals2019,baldockDailyTemporalStructure2011}} -\begin{frame}[allowframebreaks] - \begin{figure}[ht] - \centering - \begin{subfigure}[t]{0.5\textwidth} - \centering - \includegraphics[width=0.45\textwidth]{tikz/applications/baldock/mat-Baldock2019_Bristol.pdf} - \caption{Donnée} - \end{subfigure}\hfil - \begin{subfigure}[t]{0.5\textwidth} - \centering - \includegraphics[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Bristol.pdf} - \caption{Reordered} - \end{subfigure} - \caption{Bristol} - \end{figure} - - \begin{figure}[ht] - \centering - \begin{subfigure}[t]{0.5\textwidth} - \centering - \includegraphics[width=0.45\textwidth]{tikz/applications/baldock/mat-Baldock2019_Edinburgh.pdf} - \caption{Donnée} - \end{subfigure}\hfil - \begin{subfigure}[t]{0.5\textwidth} - \centering - \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Edinburgh.pdf} - \caption{Reordered} - \end{subfigure} - \caption{Edinburgh} - \end{figure} - - \begin{figure} - \begin{subfigure}[ht]{0.5\textwidth} - \centering - \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/mat-Baldock2019_Leeds.pdf} - \caption{Donnée} - \end{subfigure}\hfil - \begin{subfigure}[ht]{0.5\textwidth} - \centering - \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf} - \caption{Réordonnée} - \end{subfigure} - \caption{Leeds} - \end{figure} - - \begin{figure} - \begin{subfigure}[ht]{0.5\textwidth} - \centering - \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/mat-Baldock2019_Reading.pdf} - \caption{Donnée} - \end{subfigure}\hfil - \begin{subfigure}[ht]{0.5\textwidth} - \centering - \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Reading.pdf} - \caption{Réordonnée} - \end{subfigure} - \caption{Reading} - \end{figure} \end{frame} \ No newline at end of file diff --git a/presentation.tex b/presentation.tex index 7b1363c..1c5c1e7 100644 --- a/presentation.tex +++ b/presentation.tex @@ -112,10 +112,10 @@ \subtitle{JdS 2025} -\title[Bipartite networks collection]{Joint analysis of bipartite networks collection} +\title[Bipartite networks collection]{Joint estimation of bipartite network collections. Application to plant-pollinator networks.} \author[L. Lacoste]{\underline{Louis Lacoste}, Pierre Barbillon and -Sophie Donnet\newline Laboratoire MIA Paris-Saclay\newline\ccbysa} -\date{\today} +Sophie Donnet\newline UMR MIA Paris-Saclay, AgroParisTech, INRAE, Université Paris-Saclay\newline\ccbysa} +\date{03 Juin 2025} \begin{document} diff --git a/principal.tex b/principal.tex index d486b26..3123a9b 100644 --- a/principal.tex +++ b/principal.tex @@ -56,14 +56,14 @@ \begin{column}{0.4\textwidth} \only<1>{ \begin{itemize} - \item A bipartite graph $G = (U,V,E)$ - \item Can be encoded by a bi-adjacency matrix $Y \in \{0,1\}^{n_1 \times n_2}$ + \item Bipartite graph $G = (U,V,E)$ + \item Encoded in bi-adjacency matrix $Y \in \{0,1\}^{n_1 \times n_2}$ \end{itemize}} \only<2>{ \begin{itemize} \item Increasingly available - \item Modeling of various interactions, here ecosystems - \item Structure necessary for: biodiversity monitoring, robustness, risk of collapse + \item Ecosystems described by their interactions + \item Functional structure for: biodiversity monitoring, robustness, risk of collapse \end{itemize}} \end{column} \end{columns} @@ -139,7 +139,7 @@ \begin{tikzpicture}[scale=0.35] \input{tikz/lbm.tex} \end{tikzpicture} - \caption{Example of LBM\footnotemark[\thefootnote]} + \caption{Example of BiSBM} \label{fig:LBMvisu} \end{figure} \end{column} @@ -192,8 +192,7 @@ \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Reading.pdf} \caption{Reading, $Q_1 = 3, Q_2 = 3$} \end{subfigure} - \vspace{-\baselineskip} - \caption{Reordered adjacency matrices, using BiSBM for each network} + \caption{Separate BiSBM fit for each network} \label{fig:adj-reord} \end{figure} } @@ -215,7 +214,7 @@ \onslide<2>{ \begin{block}{$\pi\rho$-colBiSBM} \[ \forall m \in \{1\dots M\}, Y^m \overset{ind}{\sim} - \mathcal{B}ern\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2, \pi^m, \rho^m, \alpha) + \mathcal{B}ern\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2, \pi\alert{^m}, \rho\alert{^m}, \alpha) \] with $\theta = ((\pi\alert{^m})_{m=1,\dots, M}, (\rho\alert{^m})_{m=1,\dots, @@ -264,21 +263,20 @@ \label{sec:inference-and-model-selection} \begin{frame}{Parameter estimation}{How ?} \begin{align*} - \ell(\mathbf{Y};\theta) = & \sum_{m=1}^{M} \ell(Y^m;\theta) \\ - = & \sum_{m=1}^{M} \log \int_{\alert<2->{\mathcal{Z}^m\times\mathcal{W}^m}} \exp\{\ell_c(Y^m,Z^m,W^m;\theta)\} dZ^m dW^m \\ - = & \sum_{m=1}^{M} \log\int_{\alert<2->{\mathcal{Z}^m\times\mathcal{W}^m}}\exp\{\ell(Y^m | Z^m,W^m;\alpha) + \\ - & \ell(Z^m;\pi) + \ell(W^m;\rho)\} dZ^m dW^m + \ell(\mathbf{Y};\theta) = & \sum_{m=1}^{M} \ell(Y^m;\theta) \\ + = & \sum_{m=1}^{M} \log \sum_{\alert<2->{Z^m \in \mathcal{Z}^m,W^m\in\mathcal{W}^m}} \exp\{\ell_c(Y^m,Z^m,W^m;\theta)\} \\ + = & \sum_{m=1}^{M} \log\sum_{\alert<2->{Z^m \in \mathcal{Z}^m,W^m\in\mathcal{W}^m}}\exp\{\ell(Y^m | Z^m,W^m;\alpha) + \\ + & \ell(Z^m;\pi) + \ell(W^m;\rho)\} % & = \sum_{m=1}^{M} \sum_{i=1}^{n_1^m} \sum_{q=1}^{Q_1} Z_{iq} \log(\pi_q) + \sum_{j=1}^{n_2^m}\sum_{r=1}^{Q_2} W_{jr} \log(\rho_r) \\ % & + \sum_{i,j}\sum_{q,r} Z_{iq}W_{jr} \log \mathcal{B}ern(Y_{ij};\alpha_{qr}) \end{align*} \onslide<3>{ - We would like to use Expectation-Maximization (EM) algorithm~\parencite{dempsterMaximumLikelihoodIncomplete1977} but the law of $\mathbf{Z,W|Y},\theta^{(t-1)}$ is untractable due to dependence between row and column groups.} + EM impracticable since $\mathbf{Z,W|Y}$ intractable due to + conditional dependency.} \end{frame} \begin{frame}{Parameter estimation}{Solution} - By \emph{Variational EM}, as proposed - by~\cite{daudinMixtureModelRandom2008} and adapted for joint simple networks - by~\cite{chabert-liddellLearningCommonStructures2024}. + \emph{Variational EM}~\cite{daudinMixtureModelRandom2008,chabert-liddellLearningCommonStructures2024}. \begin{block}{Variational approximation of $\bm{Z,W|Y},\theta^{(t-1)}$} $\mathcal{R}_{Y^m,\tau}(Z^m, W^m) = \mathcal{R}^1_{Y^m,\tau}(Z^m) @@ -298,26 +296,25 @@ \end{frame} \begin{frame}{Selection criterion for $Q_1, Q_2$} - \cite{biernackiAssessingMixtureModel2000} introduced the Integrated Classification Likelihood (ICL): + Integrated Classification Likelihood (ICL)~\cite{biernackiAssessingMixtureModel2000} \begin{align*} \text{ICL}(\bm{Y}, Q_1, Q_2) & = \mathbb{E} [\ell_c(\bm{Y,Z,W};\hat{\theta})] -\frac{1}{2}\text{pen}(Q_1, Q_2) \\ & = \ell(\mathbf{Y};\hat{\theta}) - \mathcal{H}(p(\mathbf{Z,W}|\mathbf{Y},\hat{\theta})) - \frac{1}{2}\text{pen}(Q_1, Q_2) - \end{align*} leads to low entropy clustering. Common in literature for SBM. + \end{align*} For SBM~\cite{daudinMixtureModelRandom2008}. \onslide<2->{ \begin{align*} - \text{BIC-L}(\bm{Y}, & Q_1, Q_2) = \mathbb{E}_{\mathcal{R}_{\mathbf{Y},\hat{\tau}}} [\ell_c(\bm{Y,Z,W};\hat{\theta}^{\text{var}})] + \mathcal{H(\mathcal{R}_{\mathbf{Y},\hat{\tau}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \\ - & = \mathcal{J(\mathcal{R}_{\mathbf{Y},\hat{\tau}}, \hat{\theta}^{\text{var}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \textcolor{red}{\leq \log p(\mathbf{Y};\hat{\theta}^{\text{MV}})- \frac{1}{2}\text{pen}(Q_1, Q_2)} \\ + \text{BIC-L}(\bm{Y}, Q_1, Q_2) & = \mathbb{E}_{\mathcal{R}_{\mathbf{Y},\hat{\tau}}} [\ell_c(\bm{Y,Z,W};\hat{\theta}^{\text{var}})] + \mathcal{H(\mathcal{R}_{\mathbf{Y},\hat{\tau}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \\ + & = \mathcal{J(\mathcal{R}_{\mathbf{Y},\hat{\tau}}, \hat{\theta}^{\text{var}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \end{align*} - because we want fuzzier clustering. } \end{frame} \begin{frame}{Practical problems of choosing $Q_1, Q_2$} \begin{alertblock}{Exploration problems} \begin{itemize} - \item Exploration of a 2D grid is costly. \uncover<2->{$\rightarrow$ \textbf{Greedy - approach} and \textbf{sliding window}} - \item Sensitivity to initializations. \uncover<3->{$\rightarrow$ \textbf{Spectral + \item Sensitivity to initializations. \uncover<2->{$\rightarrow$ \textbf{Spectral clustering} and \textbf{split \& merge} approach} + \item Exploration of a 2D grid is costly. \uncover<3->{$\rightarrow$ \textbf{Greedy + approach} and \textbf{sliding window}} \end{itemize} \end{alertblock} \end{frame} @@ -351,25 +348,25 @@ \begin{subfigure}[t]{0.5\textwidth} \centering \includegraphics[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Bristol.pdf} - \caption{Bristol, $Q_1 = 3, Q_2 = 5$} + \caption{Bristol} \end{subfigure}\hfil \begin{subfigure}[t]{0.5\textwidth} \centering \includegraphics[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Edinburgh.pdf} - \caption{Edinburgh, $Q_1 = 3, Q_2 = 5$} + \caption{Edinburgh} \end{subfigure} \newline \begin{subfigure}[ht]{0.5\textwidth} \centering \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf} - \caption{Leeds, $Q_1 = 3, Q_2 = 5$} + \caption{Leeds} \end{subfigure}\hfil \begin{subfigure}[ht]{0.5\textwidth} \centering \includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Reading.pdf} - \caption{Reading, $Q_1 = 3, Q_2 = 5$} + \caption{Reading} \end{subfigure} - \caption{Reordered adjacency matrices by \emph{iid}-colBiSBM,~\cite{baldockSystemsApproachReveals2019}} + \caption{\emph{iid}-colBiSBM fit, $Q_1 = 3, Q_2 = 5$} \end{figure}} \end{frame} @@ -412,14 +409,14 @@ \begin{column}{0.2\textwidth} \begin{figure} - \onslide<3>{ + \onslide<2>{ \begin{subfigure}[t]{0.7\textwidth} \centering \includegraphics[width=1\textwidth]{img/baldock/bombus-hortorum.jpeg} \caption{\emph{Bombus Hortorum} or garden bumblebee} \end{subfigure} } - \onslide<4>{ + \onslide<3>{ \begin{subfigure}[t]{0.7\textwidth} \centering \includegraphics[width=1\textwidth]{img/baldock/bombus-lapidarius.jpeg} @@ -448,13 +445,13 @@ bottom color=blue!1!white, anchor=right corner, minimum height=42mm, label={[label distance = 2mm]207:Generalists}, label={[label distance = 12mm]357:Specialists}] (T) at ($(struct.north east)+(-1,-2.5)$) {}; - \only<3>{ + \only<2>{ \node[left = 3mm of gen] (towns_gen_garden) {B, L}; \node[left = 3mm of spe] (towns_spe_garden) {\phantom{B, }E, R}; \path (towns_gen_garden) edge[->,thick] (gen); \path (towns_spe_garden) edge[->,thick] (spe); } - \only<4>{ + \only<3>{ \node[left = 3mm of interm] (towns_interm_red) {L}; \node[left = 3mm of spe] (towns_spe_red) {B, E, R}; \path (towns_interm_red) edge[->,thick] (interm); @@ -471,22 +468,21 @@ \section{Conclusion} \begin{frame} \frametitle{Conclusion and perspectives} - \begin{block}{Capabilities} + \begin{block}{Summary} \begin{itemize} \item 4 models including 3 with flexibility on at least one of the dimensions (adaptability to data). - \item Detect classic and less classic structures in an agnostic way. - \item Partition a set of networks according to their structures. + \item Jointly detect classic and less classic structures agnostically. + \item Partition a collection in sub-collections with homogeneous structures. + \item \texttt{R} package \texttt{colSBM} at \url{https://github.com/GrossSBM/colSBM} \end{itemize} \end{block} - \begin{block}{Package and applications} + \begin{block}{Future work} \begin{itemize} \item Article in redaction - \item \texttt{R} package \texttt{colSBM} on - Github\footnote{\url{https://github.com/GrossSBM/colSBM}} \item Apply clustering to data from \cite{pichonTellingMutualisticAntagonistic2024,doreRelativeEffectsAnthropogenic2021} - to tell if interaction drives the structure of the network. + to tell if interaction types drives the structure of the network. \end{itemize} \end{block} \end{frame} \ No newline at end of file