Retours Pierre

This commit is contained in:
Louis 2025-05-28 18:14:12 +02:00
parent 5499e4467d
commit 42bba3c77e
3 changed files with 53 additions and 119 deletions

View file

@ -1,3 +1,16 @@
\section{Clustering}
\begin{frame}{Clustering algorithm}
\centering
\vspace{0.25\baselineskip}
\begin{tikzpicture}[scale=0.85]
\input{tikz/clustering.tex}
\end{tikzpicture}
\[
D_{\mathcal{M}}(m,m') = \sum_{q = 1}^{Q_1} \sum_{r = 1}^{Q_2} \max(\widetilde{\pi}_{q}^{m}, \widetilde{\pi}_{q}^{m'}) \left( \widetilde{\alpha}_{qr}^{m} - \widetilde{\alpha}_{qr}^{m'}\right)^{2} \max(\widetilde{\rho}_{r}^{m}, \widetilde{\rho}_{r}^{m'})
\]
\end{frame}
\section{VEM}
\begin{frame}{Developed formula of variational EM}
@ -75,6 +88,7 @@
\end{align*}
Thus $\ell(\bY;\theta) - \KL{\Ryt}{\log \Prob(\bZ,\bW|\bY;\theta)} = \mathcal{J}(\tau;\theta) \qed$
\end{frame}
\section{Model selection}
\begin{frame}
\frametitle{On the BIC-L}
@ -97,8 +111,6 @@
\]
\end{frame}
\section{Model selection}
\begin{frame}
\frametitle{Choice of $(Q_1,Q_2)$ - Greedy approach}
\begin{columns}
@ -156,77 +168,3 @@
\end{column}
\end{columns}
\end{frame}
\section{Clustering}
\begin{frame}{Clustering algorithm}
\centering
\vspace{0.25\baselineskip}
\begin{tikzpicture}[scale=0.85]
\input{tikz/clustering.tex}
\end{tikzpicture}
\[
D_{\mathcal{M}}(m,m') = \sum_{q = 1}^{Q_1} \sum_{r = 1}^{Q_2} \max(\widetilde{\pi}_{q}^{m}, \widetilde{\pi}_{q}^{m'}) \left( \widetilde{\alpha}_{qr}^{m} - \widetilde{\alpha}_{qr}^{m'}\right)^{2} \max(\widetilde{\rho}_{r}^{m}, \widetilde{\rho}_{r}^{m'})
\]
\end{frame}
\section{Results~\cite{baldockSystemsApproachReveals2019,baldockDailyTemporalStructure2011}}
\begin{frame}[allowframebreaks]
\begin{figure}[ht]
\centering
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.45\textwidth]{tikz/applications/baldock/mat-Baldock2019_Bristol.pdf}
\caption{Donnée}
\end{subfigure}\hfil
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Bristol.pdf}
\caption{Reordered}
\end{subfigure}
\caption{Bristol}
\end{figure}
\begin{figure}[ht]
\centering
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.45\textwidth]{tikz/applications/baldock/mat-Baldock2019_Edinburgh.pdf}
\caption{Donnée}
\end{subfigure}\hfil
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Edinburgh.pdf}
\caption{Reordered}
\end{subfigure}
\caption{Edinburgh}
\end{figure}
\begin{figure}
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/mat-Baldock2019_Leeds.pdf}
\caption{Donnée}
\end{subfigure}\hfil
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf}
\caption{Réordonnée}
\end{subfigure}
\caption{Leeds}
\end{figure}
\begin{figure}
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/mat-Baldock2019_Reading.pdf}
\caption{Donnée}
\end{subfigure}\hfil
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Reading.pdf}
\caption{Réordonnée}
\end{subfigure}
\caption{Reading}
\end{figure}
\end{frame}

View file

@ -112,10 +112,10 @@
\subtitle{JdS 2025}
\title[Bipartite networks collection]{Joint analysis of bipartite networks collection}
\title[Bipartite networks collection]{Joint estimation of bipartite network collections. Application to plant-pollinator networks.}
\author[L. Lacoste]{\underline{Louis Lacoste}, Pierre Barbillon and
Sophie Donnet\newline Laboratoire MIA Paris-Saclay\newline\ccbysa}
\date{\today}
Sophie Donnet\newline UMR MIA Paris-Saclay, AgroParisTech, INRAE, Université Paris-Saclay\newline\ccbysa}
\date{03 Juin 2025}
\begin{document}

View file

@ -56,14 +56,14 @@
\begin{column}{0.4\textwidth}
\only<1>{
\begin{itemize}
\item A bipartite graph $G = (U,V,E)$
\item Can be encoded by a bi-adjacency matrix $Y \in \{0,1\}^{n_1 \times n_2}$
\item Bipartite graph $G = (U,V,E)$
\item Encoded in bi-adjacency matrix $Y \in \{0,1\}^{n_1 \times n_2}$
\end{itemize}}
\only<2>{
\begin{itemize}
\item Increasingly available
\item Modeling of various interactions, here ecosystems
\item Structure necessary for: biodiversity monitoring, robustness, risk of collapse
\item Ecosystems described by their interactions
\item Functional structure for: biodiversity monitoring, robustness, risk of collapse
\end{itemize}}
\end{column}
\end{columns}
@ -139,7 +139,7 @@
\begin{tikzpicture}[scale=0.35]
\input{tikz/lbm.tex}
\end{tikzpicture}
\caption{Example of LBM\footnotemark[\thefootnote]}
\caption{Example of BiSBM}
\label{fig:LBMvisu}
\end{figure}
\end{column}
@ -192,8 +192,7 @@
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Reading.pdf}
\caption{Reading, $Q_1 = 3, Q_2 = 3$}
\end{subfigure}
\vspace{-\baselineskip}
\caption{Reordered adjacency matrices, using BiSBM for each network}
\caption{Separate BiSBM fit for each network}
\label{fig:adj-reord}
\end{figure}
}
@ -215,7 +214,7 @@
\onslide<2>{ \begin{block}{$\pi\rho$-colBiSBM}
\[
\forall m \in \{1\dots M\}, Y^m \overset{ind}{\sim}
\mathcal{B}ern\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2, \pi^m, \rho^m, \alpha)
\mathcal{B}ern\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2, \pi\alert{^m}, \rho\alert{^m}, \alpha)
\]
with $\theta = ((\pi\alert{^m})_{m=1,\dots, M}, (\rho\alert{^m})_{m=1,\dots,
@ -265,20 +264,19 @@
\begin{frame}{Parameter estimation}{How ?}
\begin{align*}
\ell(\mathbf{Y};\theta) = & \sum_{m=1}^{M} \ell(Y^m;\theta) \\
= & \sum_{m=1}^{M} \log \int_{\alert<2->{\mathcal{Z}^m\times\mathcal{W}^m}} \exp\{\ell_c(Y^m,Z^m,W^m;\theta)\} dZ^m dW^m \\
= & \sum_{m=1}^{M} \log\int_{\alert<2->{\mathcal{Z}^m\times\mathcal{W}^m}}\exp\{\ell(Y^m | Z^m,W^m;\alpha) + \\
& \ell(Z^m;\pi) + \ell(W^m;\rho)\} dZ^m dW^m
= & \sum_{m=1}^{M} \log \sum_{\alert<2->{Z^m \in \mathcal{Z}^m,W^m\in\mathcal{W}^m}} \exp\{\ell_c(Y^m,Z^m,W^m;\theta)\} \\
= & \sum_{m=1}^{M} \log\sum_{\alert<2->{Z^m \in \mathcal{Z}^m,W^m\in\mathcal{W}^m}}\exp\{\ell(Y^m | Z^m,W^m;\alpha) + \\
& \ell(Z^m;\pi) + \ell(W^m;\rho)\}
% & = \sum_{m=1}^{M} \sum_{i=1}^{n_1^m} \sum_{q=1}^{Q_1} Z_{iq} \log(\pi_q) + \sum_{j=1}^{n_2^m}\sum_{r=1}^{Q_2} W_{jr} \log(\rho_r) \\
% & + \sum_{i,j}\sum_{q,r} Z_{iq}W_{jr} \log \mathcal{B}ern(Y_{ij};\alpha_{qr})
\end{align*}
\onslide<3>{
We would like to use Expectation-Maximization (EM) algorithm~\parencite{dempsterMaximumLikelihoodIncomplete1977} but the law of $\mathbf{Z,W|Y},\theta^{(t-1)}$ is untractable due to dependence between row and column groups.}
EM impracticable since $\mathbf{Z,W|Y}$ intractable due to
conditional dependency.}
\end{frame}
\begin{frame}{Parameter estimation}{Solution}
By \emph{Variational EM}, as proposed
by~\cite{daudinMixtureModelRandom2008} and adapted for joint simple networks
by~\cite{chabert-liddellLearningCommonStructures2024}.
\emph{Variational EM}~\cite{daudinMixtureModelRandom2008,chabert-liddellLearningCommonStructures2024}.
\begin{block}{Variational approximation of $\bm{Z,W|Y},\theta^{(t-1)}$}
$\mathcal{R}_{Y^m,\tau}(Z^m, W^m) =
\mathcal{R}^1_{Y^m,\tau}(Z^m)
@ -298,26 +296,25 @@
\end{frame}
\begin{frame}{Selection criterion for $Q_1, Q_2$}
\cite{biernackiAssessingMixtureModel2000} introduced the Integrated Classification Likelihood (ICL):
Integrated Classification Likelihood (ICL)~\cite{biernackiAssessingMixtureModel2000}
\begin{align*}
\text{ICL}(\bm{Y}, Q_1, Q_2) & = \mathbb{E} [\ell_c(\bm{Y,Z,W};\hat{\theta})] -\frac{1}{2}\text{pen}(Q_1, Q_2) \\
& = \ell(\mathbf{Y};\hat{\theta}) - \mathcal{H}(p(\mathbf{Z,W}|\mathbf{Y},\hat{\theta})) - \frac{1}{2}\text{pen}(Q_1, Q_2)
\end{align*} leads to low entropy clustering. Common in literature for SBM.
\end{align*} For SBM~\cite{daudinMixtureModelRandom2008}.
\onslide<2->{
\begin{align*}
\text{BIC-L}(\bm{Y}, & Q_1, Q_2) = \mathbb{E}_{\mathcal{R}_{\mathbf{Y},\hat{\tau}}} [\ell_c(\bm{Y,Z,W};\hat{\theta}^{\text{var}})] + \mathcal{H(\mathcal{R}_{\mathbf{Y},\hat{\tau}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \\
& = \mathcal{J(\mathcal{R}_{\mathbf{Y},\hat{\tau}}, \hat{\theta}^{\text{var}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \textcolor{red}{\leq \log p(\mathbf{Y};\hat{\theta}^{\text{MV}})- \frac{1}{2}\text{pen}(Q_1, Q_2)} \\
\text{BIC-L}(\bm{Y}, Q_1, Q_2) & = \mathbb{E}_{\mathcal{R}_{\mathbf{Y},\hat{\tau}}} [\ell_c(\bm{Y,Z,W};\hat{\theta}^{\text{var}})] + \mathcal{H(\mathcal{R}_{\mathbf{Y},\hat{\tau}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \\
& = \mathcal{J(\mathcal{R}_{\mathbf{Y},\hat{\tau}}, \hat{\theta}^{\text{var}})} - \frac{1}{2}\text{pen}(Q_1, Q_2)
\end{align*}
because we want fuzzier clustering.
}
\end{frame}
\begin{frame}{Practical problems of choosing $Q_1, Q_2$}
\begin{alertblock}{Exploration problems}
\begin{itemize}
\item Exploration of a 2D grid is costly. \uncover<2->{$\rightarrow$ \textbf{Greedy
approach} and \textbf{sliding window}}
\item Sensitivity to initializations. \uncover<3->{$\rightarrow$ \textbf{Spectral
\item Sensitivity to initializations. \uncover<2->{$\rightarrow$ \textbf{Spectral
clustering} and \textbf{split \& merge} approach}
\item Exploration of a 2D grid is costly. \uncover<3->{$\rightarrow$ \textbf{Greedy
approach} and \textbf{sliding window}}
\end{itemize}
\end{alertblock}
\end{frame}
@ -351,25 +348,25 @@
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Bristol.pdf}
\caption{Bristol, $Q_1 = 3, Q_2 = 5$}
\caption{Bristol}
\end{subfigure}\hfil
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includegraphics[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Edinburgh.pdf}
\caption{Edinburgh, $Q_1 = 3, Q_2 = 5$}
\caption{Edinburgh}
\end{subfigure}
\newline
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds.pdf}
\caption{Leeds, $Q_1 = 3, Q_2 = 5$}
\caption{Leeds}
\end{subfigure}\hfil
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includegraphics[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Reading.pdf}
\caption{Reading, $Q_1 = 3, Q_2 = 5$}
\caption{Reading}
\end{subfigure}
\caption{Reordered adjacency matrices by \emph{iid}-colBiSBM,~\cite{baldockSystemsApproachReveals2019}}
\caption{\emph{iid}-colBiSBM fit, $Q_1 = 3, Q_2 = 5$}
\end{figure}}
\end{frame}
@ -412,14 +409,14 @@
\begin{column}{0.2\textwidth}
\begin{figure}
\onslide<3>{
\onslide<2>{
\begin{subfigure}[t]{0.7\textwidth}
\centering
\includegraphics[width=1\textwidth]{img/baldock/bombus-hortorum.jpeg}
\caption{\emph{Bombus Hortorum} or garden bumblebee}
\end{subfigure}
}
\onslide<4>{
\onslide<3>{
\begin{subfigure}[t]{0.7\textwidth}
\centering
\includegraphics[width=1\textwidth]{img/baldock/bombus-lapidarius.jpeg}
@ -448,13 +445,13 @@
bottom color=blue!1!white,
anchor=right corner, minimum height=42mm, label={[label distance = 2mm]207:Generalists}, label={[label distance = 12mm]357:Specialists}] (T) at ($(struct.north east)+(-1,-2.5)$) {};
\only<3>{
\only<2>{
\node[left = 3mm of gen] (towns_gen_garden) {B, L};
\node[left = 3mm of spe] (towns_spe_garden) {\phantom{B, }E, R};
\path (towns_gen_garden) edge[->,thick] (gen);
\path (towns_spe_garden) edge[->,thick] (spe);
}
\only<4>{
\only<3>{
\node[left = 3mm of interm] (towns_interm_red) {L};
\node[left = 3mm of spe] (towns_spe_red) {B, E, R};
\path (towns_interm_red) edge[->,thick] (interm);
@ -471,22 +468,21 @@
\section{Conclusion}
\begin{frame}
\frametitle{Conclusion and perspectives}
\begin{block}{Capabilities}
\begin{block}{Summary}
\begin{itemize}
\item 4 models including 3 with flexibility on at least one of
the dimensions (adaptability to data).
\item Detect classic and less classic structures in an agnostic way.
\item Partition a set of networks according to their structures.
\item Jointly detect classic and less classic structures agnostically.
\item Partition a collection in sub-collections with homogeneous structures.
\item \texttt{R} package \texttt{colSBM} at \url{https://github.com/GrossSBM/colSBM}
\end{itemize}
\end{block}
\begin{block}{Package and applications}
\begin{block}{Future work}
\begin{itemize}
\item Article in redaction
\item \texttt{R} package \texttt{colSBM} on
Github\footnote{\url{https://github.com/GrossSBM/colSBM}}
\item Apply clustering to data from
\cite{pichonTellingMutualisticAntagonistic2024,doreRelativeEffectsAnthropogenic2021}
to tell if interaction drives the structure of the network.
to tell if interaction types drives the structure of the network.
\end{itemize}
\end{block}
\end{frame}