soutenance : vfinale

This commit is contained in:
Louis Lacoste 2024-08-29 09:30:29 +02:00
parent 6ff3a2a09a
commit dcaa51b50d
42 changed files with 354501 additions and 3937 deletions

31
.woodpecker.yaml Normal file
View file

@ -0,0 +1,31 @@
when:
branch: main
steps:
- name: build rapport
image: texlive/texlive:latest
commands:
- cd rapport
- make
- name: build presentation
image: texlive/texlive:latest
commands:
- cd presentation
- make
- name: build soutenance
image: texlive/texlive:latest
commands:
- cd soutenance
- make
- name: publish
image: woodpeckerci/plugin-release
settings:
files:
# Could also be "hello-world*" to match both
- "hello-world"
- "hello-world.exe"
api_key:
from_secret: ACCESS_TOKEN

View file

@ -1,5 +1,78 @@
\section{Notes supplémentaires}
\printappxnotes
\section{VEM}
\begin{frame}
\frametitle{Pourquoi VE minimise KL ?}
\begin{align*}
\ell_c(\bY,\bZ,\bW;\theta) & = \log \Prob(\bZ, \bW|\bY;\theta) + \ell(\bY;\theta) \\
\Leftrightarrow \ell(\bY;\theta) & = \ell_c(\bY,\bZ,\bW;\theta) - \log \Prob(\bZ, \bW|\bY;\theta) \\
\Leftrightarrow \Esp_{\Ryt}[\ell(\bY;\theta)] & = \Esp_{\Ryt}[\ell_c(\bY,\bZ,\bW;\theta)] - \Esp_{\Ryt}[\log \Prob(\bZ,\bW|\bY;\theta)] \\
\Leftrightarrow \ell(\bY;\theta) & = \Esp_{\Ryt}[\ell_c(\bY,\bZ,\bW;\theta)] - \Esp_{\Ryt}[\log \Prob(\bZ,\bW|\bY;\theta)] \\
\end{align*}
\begin{align*}
\text{Or }\KL{\Ryt}{\log \Prob(\bZ,\bW|\bY;\theta)} & = - \Esp_{\Ryt} [\log \frac{\Prob(\bZ,\bW|\bY;\theta)}{\Ryt}] \\
= - \Esp_{\Ryt} [\log \Prob(\bZ,\bW|\bY;\theta)] + & \underbrace{\Esp_{\Ryt[\log \Ryt]}}_{-\Hshannon(\Ryt)} \\
\Leftrightarrow \KL{\Ryt}{\log \Prob(\bZ,\bW|\bY;\theta)} + \Hshannon(\Ryt) & = - \Esp_{\Ryt} [\log \Prob(\bZ,\bW|\bY;\theta)]
\end{align*}
D'où $\ell(\bY;\theta) - \KL{\Ryt}{\log \Prob(\bZ,\bW|\bY;\theta)} = \mathcal{J}(\tau;\theta) \qed$
\end{frame}
\section{Sélection de modèle}
\section{Résultats~\cite{baldockSystemsApproachReveals2019a,baldockDailyTemporalStructure2011}}
\begin{frame}[allowframebreaks]
\begin{figure}[ht]
\centering
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includestandalone[width=0.45\textwidth]{tikz/applications/baldock/mat-Baldock2019_Bristol}
\caption{Donnée}
\end{subfigure}\hfil
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includestandalone[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Bristol}
\caption{Réordonnée}
\end{subfigure}
\caption{Bristol}
\end{figure}
\begin{figure}[ht]
\centering
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includestandalone[width=0.45\textwidth]{tikz/applications/baldock/mat-Baldock2019_Edinburgh}
\caption{Donnée}
\end{subfigure}\hfil
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includestandalone[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Edinburgh}
\caption{Réordonnée}
\end{subfigure}
\caption{Edinburgh}
\end{figure}
\begin{figure}
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includestandalone[width=0.5\textwidth]{tikz/applications/baldock/mat-Baldock2019_Leeds}
\caption{Donnée}
\end{subfigure}\hfil
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includestandalone[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds}
\caption{Réordonnée}
\end{subfigure}
\caption{Leeds}
\end{figure}
\begin{figure}
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includestandalone[width=0.5\textwidth]{tikz/applications/baldock/mat-Baldock2019_Reading}
\caption{Donnée}
\end{subfigure}\hfil
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includestandalone[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Reading}
\caption{Réordonnée}
\end{subfigure}
\caption{Reading}
\end{figure}
\end{frame}

View file

@ -2,28 +2,21 @@
\label{sec:contexte-du-modele}
\begin{frame}
\frametitle{Pourquoi un réseau ?}
\begin{columns}
\begin{column}{0.5\textwidth}
\begin{block}{Contexte écologique}
\begin{itemize}
\small
\item Nombreux réseaux disponibles pour
interactions similaires.
\item Suivi biodiversité, robustesse et risque
d'effondrement \dots
\end{itemize}
\begin{columns}
\begin{column}{0.5\textwidth}
\begin{figure}[ht]
\centering
\begin{tikzpicture}[scale=.45,rotate=270]
\input{../tikz/plantpollinatornetwork.tex}
\begin{tikzpicture}[scale=.6,rotate=270]
\input{tikz/plantpollinatornetwork.tex}
\end{tikzpicture}
\caption{Exemple d'un réseau plantes-pollinisateurs}
\caption{Exemple d'un réseau}
\label{fig:plantes-pollin}
\end{figure}
\end{column}
\begin{column}{0.4\textwidth}
\begin{column}{0.3\textwidth}
\centering
\begin{align*}
\begin{pmatrix}
@ -37,170 +30,250 @@
Matrice d'adjacence associée
\end{column}
\end{columns}
\end{block}
\begin{figure}[ht]
\centering
\includestandalone[width=0.7\textwidth]{tikz/applications/baldock/graph-Baldock2019_Bristol}
\caption{Réseau plante-pollinisateur de
Bristol\newline\cite{baldockSystemsApproachReveals2019a}}
\label{fig:label}
\end{figure}
\end{column}
\onslide<2>{
\begin{column}{0.45\textwidth}
\begin{block}{Contexte mathématique}
Pour un unique réseau~: variables latentes,
\emph{embedding}, \dots
Motivations pour proposer des méthodes adaptées aux collections
de réseaux~:
\begin{column}{0.5\textwidth}
\begin{itemize}
\item Espèces différentes, rôles analogues.
\item Transfert d'informations grands vers petits réseaux.
\item Regrouper les réseaux selon leur similarité (\emph{clustering}
de réseaux).
\item Modélisation d'interactions variées, ici d'écosystèmes
\item Structure nécessaire pour~: suivi biodiversité,
robustesse, risque d'effondrement
\item De plus en plus disponibles
\end{itemize}
\end{block}
\end{column}
}
\end{columns}
\end{frame}
\begin{frame}{Méthodes d'analyse pour un réseau}
Plusieurs méthodes~:
\begin{itemize}
\item Métriques~: degré, centralité, emboîtement \dots
\item Plongement des réseaux avec GNN
\item \textbf<2>{\emph{Clustering} des n\oe uds avec modèles à variables latentes}
\end{itemize}
\end{frame}
\begin{frame}
\addtocounter{footnote}{1}
\frametitle{Latent Block Model (LBM\footnotemark[\thefootnote])}
%DONE remplacer i \in bullet par Zi = \bullet
Proposé par~\cite{govaertEMAlgorithmBlock2005}.
\cite{govaertEMAlgorithmBlock2005}.
\begin{columns}
\begin{column}{0.40\linewidth}
\begin{figure}[H]
\center
\begin{tikzpicture}[scale=0.35]
\input{../tikz/lbm.tex}
\input{tikz/lbm.tex}
\end{tikzpicture}
\caption{Exemple de LBM\footnotemark[\thefootnote]}
\label{fig:LBMvisu}
\end{figure}
\end{column}
\only<1>{
\begin{column}{0.51\linewidth}
Pour \begin{itemize}
\begin{block}{Modèle hiérarchique}
\vspace{-\baselineskip}
\begin{align*}
\forall q\in[\![ 1, Q_1]\!],~ & \mathbb{P}(Z_i = q) = \pi_q \\
\forall r\in[\![ 1, Q_2]\!],~ & \mathbb{P}(W_j = r) = \rho_r \\
& Y_{ij} | Z_i, W_j \sim \mathcal{F}(\alpha_{Z_i,W_j})
\end{align*}
$|\pi| = Q_1, |\rho| = Q_2, |\alpha| = Q_1 \times Q_2$
\end{block}
\begin{block}{Formule concise LBM}
$Y \sim \mathcal{F}\text{-BiSBM}_{n_1,n_2}(Q_1, Q_2, \pi, \rho, \alpha)$
\end{block}
\end{column}}
\only<2>{
\begin{column}{0.51\linewidth}
Avec \begin{itemize}
\item $Q_1 = |\{{\color{blueind}\bullet},{\color{cyanind}\bullet},{\color{electricblue}\bullet}\}|$ blocs fixés en ligne
\item $Q_2 = |\{{\color{burntorange}\bullet},{\color{goldenyellow}\bullet},{\color{peach}\bullet}\}|$ blocs fixés en colonne
\end{itemize}
\begin{block}{Paramètres}
\begin{itemize}
\item $\pi_{\bullet} = \mathbb{P}(Z_i = \bullet)$ en ligne et $\rho_{\bullet} = \mathbb{P}(W_j = \bullet)$ en colonne
\item $\alpha_{{\color{blueind}\bullet}{\color{burntorange}\bullet}} = \mathbb{P}(X_{ij} = 1 | Z_i = {\color{blueind}\bullet}, W_j = {\color{burntorange}\bullet})$
\item $\pi_{{\color{blueind}\bullet}} = \mathbb{P}(Z_i = {\color{blueind}\bullet})$
\item $\rho_{{\color{burntorange}\bullet}} = \mathbb{P}(W_j = {\color{burntorange}\bullet})$
\item $\alpha_{{\color{blueind}\bullet}{\color{burntorange}\bullet}} = \mathbb{P}(Y_{ij} = 1 | Z_i = {\color{blueind}\bullet}, W_j = {\color{burntorange}\bullet})$
\end{itemize}
\end{block}
\end{column}
\end{column}}
\end{columns}
\footnotetext[\thefootnote]{Que j'appellerai par la suite BiSBM}
\end{frame}
\section{Modèle de collection de réseaux bipartites}
\begin{frame}
\frametitle{Plusieurs réseaux}
\begin{figure}[ht]
\centering
\begin{subfigure}[ht]{0.3\textwidth}
\includestandalone[width=1.1\textwidth]{tikz/applications/baldock/mat-Baldock2019_Bristol}
\caption{Bristol}
\end{subfigure}
\begin{subfigure}[ht]{0.3\textwidth}
\includestandalone[width=1.1\textwidth]{tikz/applications/baldock/mat-Baldock2019_Edinburgh}
\caption{Edinburgh}
\end{subfigure}
\begin{subfigure}[ht]{0.3\textwidth}
\includestandalone[width=1.1\textwidth]{tikz/applications/baldock/mat-Baldock2019_Leeds}
\caption{Leeds}
\end{subfigure}
\caption{Matrices d'adjacence,~\cite{baldockSystemsApproachReveals2019a}}
\label{fig:adj}
\end{figure}
\end{frame}
\section[Modèles collection bipartites]{Modèles de collection de réseaux bipartites}
\label{sec:extension-de-colsbm-aux-reseaux-bipartites}
\begin{frame}
\frametitle{Collections bipartites}
\begin{tikzpicture}[scale=0.33]
\input{../tikz/collbm-iid.tex}
\end{tikzpicture}
\begin{itemize}
\item $Q_1 = |\{{\color{blueind}\bullet},{\color{cyanind}\bullet},{\color{electricblue}\bullet}\}|$ blocs fixés en ligne
\item $Q_2 = |\{{\color{burntorange}\bullet},{\color{goldenyellow}\bullet},{\color{peach}\bullet}\}|$ blocs fixés en colonne
\end{itemize}
\begin{block}{Paramètres}
\begin{itemize}
\item $\pi_{\bullet} = \mathbb{P}(Z_i =\bullet)$ en ligne et $\rho_{\bullet} = \mathbb{P}(W_j = \bullet)$ en colonne
\item $\alpha_{{\color{blueind}\bullet}{\color{burntorange}\bullet}} = \mathbb{P}(X_{ij} = 1 | Z_i = {\color{blueind}\bullet}, W_j = {\color{burntorange}\bullet})$
\end{itemize}
\end{block}
\[
\forall m \in \{1\dots M\}, Y^m \overset{ind}{\sim} \mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1^m, Q_2^m, \pi^m, \rho^m, \alpha^m)
\]
\onslide<2>{
\begin{figure}[ht]
\centering
\begin{subfigure}[ht]{0.3\textwidth}
\includestandalone[width=1.1\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Bristol}
\caption{Bristol}
\end{subfigure}
\begin{subfigure}[ht]{0.3\textwidth}
\includestandalone[width=1.1\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Edinburgh}
\caption{Edinburgh}
\end{subfigure}
\begin{subfigure}[ht]{0.3\textwidth}
\includestandalone[width=1.1\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2019_Leeds}
\caption{Leeds}
\end{subfigure}
\caption{Matrices d'adjacence réordonnées, grâce au LBM}
\label{fig:adj-reord}
\end{figure}
}
\end{frame}
\begin{frame}
\frametitle{Différents modèles}
\only<1>{
\begin{tikzpicture}[scale=0.33]
\input{../tikz/collbm-iid.tex}
\end{tikzpicture}
\begin{block}{\emph{iid-colBiSBM}}
$\bm{\pi} = (\pi_1, \dots \pi_{Q_1})$ et $\bm{\rho} = (\rho_1, \dots \rho_{Q_2})$
\onslide<1->{ \begin{block}{\emph{iid}-colBiSBM}
\[
\forall m \in \{1\dots M\}, Y^m \overset{iid}{\sim}
\mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2, \pi, \rho, \alpha)
\]
avec $\theta = (\pi, \rho, \alpha)$.
\end{block}}
\onslide<2>{ \begin{block}{$\pi\rho$-colBiSBM}
\[
\forall m \in \{1\dots M\}, Y^m \overset{ind}{\sim}
\mathcal{F}\text{-BiSBM}_{n_1^m,n_2^m}(Q_1, Q_2, \pi^m, \rho^m, \alpha)
\]
avec $\theta = ((\pi^m)_{m=1,\dots, M}, (\rho^m)_{m=1,\dots, M}, \alpha)$.
\end{block}
}
\only<2>{
\begin{tikzpicture}[scale=0.33]
\input{../tikz/collbm-pirho.tex}
\end{tikzpicture}
\begin{block}{\emph{$\pi\rho$-colBiSBM}}
$\bm{\pi} = ((\pi_{\color{black}1}^{\color{red}m}, \dots \pi_{\color{black}Q_1}^{\color{red}m}))_{m=1,\dots M}$ et $\bm{\rho} = ((\rho_{\color{black}1}^{\color{red}m}, \dots \rho_{\color{black}Q_2}^{\color{red}m}))_{m=1,\dots M}$ %{$\forall q \in \llbracket 1, Q_1 - 1\rrbracket, \pi_q > 0$ et $\forall r \in \llbracket 1, Q_2 - 1\rrbracket, \rho_r > 0$}
\small \\
avec $\forall q,m \in \llbracket 1, Q_1 \rrbracket \times \llbracket 1, M \rrbracket, \pi_q^m \in \left[ 0,1 \right]$
et $\forall r,m \in \llbracket 1, Q_2 \rrbracket \times \llbracket 1, M \rrbracket, \rho_r^m \in \left[ 0,1 \right]$
\end{block}
}
Dans tous les modèles la structure de connectivité ($\bm{\alpha}$) est supposée identique au sein de la collection.
\end{frame}
\begin{frame}
\frametitle{Estimation des paramètres}
% DONE dire que tau i q m c' est la proba que Zim = q, approximation de la proba variationnelle. Parce qu on impose lindependance
% Par maximisation d'une borne inférieure variationnelle de la
% log-vraisemblance des données observées.
En adaptant \cite{chabert-liddellLearningCommonStructures2024a} qui se base
sur la méthode proposée par \cite{daudinMixtureModelRandom2008} utilisant
l'algorithme \emph{Variational EM}.
Maximisation de la log-vraisemblance ?
\begin{block}{log-vraisemblance et log-vraisemblance complète}
\[
\ell(\bm{Y};\theta) = \sum_{\bm{Z,W}\in \bm{\mathcal{Z}\times\mathcal{W}}} \ell_c(\bm{Y}, \bm{Z}, \bm{W};\theta)
\]
avec $\bm{\mathcal{Z}} = \{1,\dots,\alert<2>{Q_1}\}^{\alert<2>{n}}, \bm{\mathcal{W}} = \{1,\dots,\alert<2>{Q_2}\}^{\alert<2>{n}}$
\end{block}
\uncover<3>{Donc, algorithme classique $\Rightarrow$
\emph{Expectation-Maximization} (EM).}
\end{frame}
\begin{frame}
\frametitle{Par EM classique}
A l'itération $(t)$ :
\begin{itemize}
\item[$\bullet$]\textbf{Étape E}: calculer
$$ \mathcal{Q}(\theta | \theta^{(t-1)}) = \mathbb E_{\alert<2>{\bm Z, \bm W | \bm Y, \theta^{(t-1)}} } \left[\ell_c(\bm Y, \bm W, \bm Z; \theta) \right] $$
\item[$\bullet$]\textbf{Étape M}:
$$ \theta^{(t)} = \arg \max_{\theta} \mathcal{Q}(\theta | \theta^{(t-1)})$$
\end{itemize}
\uncover<2>{
\begin{alertblock}{Problème pour l'EM classique}
Loi de $\bm{Z,W|Y},\theta^{(t-1)}$ inaccessible
\end{alertblock}}
\end{frame}
\begin{frame}
Par \emph{Variational EM}, comme proposé
par~\cite{daudinMixtureModelRandom2008,
chabert-liddellLearningCommonStructures2024a}.
\begin{block}{Approximation variationnelle de $\bm{Z,W|Y},\theta^{(t-1)}$}
$\mathcal{R}_{Y^m,\tau}(\mathbf{Z}^m, \mathbf{W}^m) =
\mathcal{R}^1_{Y^m,\tau}(\mathbf{Z}^m)
{\color{red}\times}
\mathcal{R}^2_{Y^m,\tau}(\mathbf{W}^m) \Rightarrow$ indépendance lignes, colonnes.
\end{block}
\begin{multline*}
\ell (\bm{X};\bm{\theta}) \geq \color{red}\sum_{m=1}^{M} \bigg(
\color{black} Q^m(\boldsymbol\theta\mid\boldsymbol\theta^{(t)}) +
\mathcal{H}(\mathcal{R}_{\mathbf{X}^m,\boldsymbol\theta^{(t)}}
\ell (\bm{Y};\theta) \geq \color{red}\sum_{m=1}^{M} \bigg(
\color{black} \mathcal{Q}^m(\theta\mid\theta^{(t)}) +
\mathcal{H}(\mathcal{R}_{Y^m,\theta^{(t)}}
(\mathbf{Z}^m, \mathbf{W}^m))
\color{red}\bigg) \color{black}
=: J(\bm{\tau};\bm{\theta})
\eqcolon \mathcal{J}(\tau;\theta)
\end{multline*}
$Q^m(\boldsymbol\theta\mid\boldsymbol\theta^{(t)}) =
\operatorname{E}_{\mathbf{Z}^m,\mathbf{W}^m
\sim \mathcal{R}_{\mathbf{X}^m,\boldsymbol\theta^{(t)}}(.)}
\left[ \log p (\mathbf{X}^m,\mathbf{Z}^m,\mathbf{W}^m | \boldsymbol\theta) \right] \,$
\begin{block}{Approximation variationnelle}
$\mathcal{R}_{\mathbf{X}^m,\boldsymbol\theta^{(t)}}(\mathbf{Z}^m, \mathbf{W}^m) =
P(\mathbf{Z}^m | \mathbf{X}^m,\boldsymbol\theta^{(t)}) P(\mathbf{W}^m | \mathbf{X}^m,\boldsymbol\theta^{(t)})$, c'est à dire avoir
une indépendance lignes, colonnes.
\end{block}
$\mathcal{Q}^m(\theta\mid\theta^{(t)}) =
\mathbb{E}_{\mathbf{Z}^m,\mathbf{W}^m
\sim \mathcal{R}_{Y^m,\tau}(.)}
\left[ \ell_c(Y^m,\mathbf{Z}^m,\mathbf{W}^m | \theta) \right] \,$
\end{frame}
\begin{frame}{Formule développée de l'EM variationnel}
\begin{multline*}
\ell (\bm{X};\bm{\theta}) \geq \color{red}\sum_{m=1}^{M} \bigg( \color{black} \sum_{i = 1}^{n_1^m}\sum_{j=1}^{n_2^m}\sum_{q \in \mathcal{Q}_{1,m}} \sum_{r \in \mathcal{Q}_{2,m}} \tau^{1,m}_{i,q} \tau^{2,m}_{j,r} \log f(X^{m}_{ij}; \alpha_{qr}) \\
\ell (\bm{Y};\theta) \geq \color{red}\sum_{m=1}^{M} \bigg( \color{black} \sum_{i = 1}^{n_1^m}\sum_{j=1}^{n_2^m}\sum_{q \in \mathcal{Q}_{1,m}} \sum_{r \in \mathcal{Q}_{2,m}} \tau^{1,m}_{i,q} \tau^{2,m}_{j,r} \log f(Y^{m}_{ij}; \alpha_{qr}) \\
+ \sum_{i=1}^{n_1^m} \sum_{q \in \mathcal{Q}_{1,m}} \tau^{1,m}_{i,q} \log \pi_{\color{black}q}^{\color{gray}m} + \sum_{j=1}^{n_2^m} \sum_{r \in \mathcal{Q}_{2,m}} \tau^{2,m}_{j,r} \log \rho_{\color{black}r}^{\color{gray}m} \\
- \sum_{i=1}^{n_1} \tau^{1,m}_{i,q} \log \tau^{1,m}_{i,q} - \sum_{j=1}^{n_2} \tau^{2,m}_{j,r} \log \tau^{2,m}_{j,r} \color{red}\bigg) \color{black} =: J(\bm{\tau};\bm{\theta}),
- \sum_{i=1}^{n_1} \tau^{1,m}_{i,q} \log \tau^{1,m}_{i,q} - \sum_{j=1}^{n_2} \tau^{2,m}_{j,r} \log \tau^{2,m}_{j,r} \color{red}\bigg) \color{black} \eqcolon
\mathcal{J}(\tau;\theta),
\end{multline*}
$\ell$ désigne la $\log$ vraisemblance.
\begin{block}{Approximation variationnelle}
$\tau_{iq}^{1,m} = P_{\mathcal{R}_m}(Z_{iq}^m = 1|X_{i\bullet}^m)$
et $\tau_{jr}^{2,m} = P_{\mathcal{R}_m}(W_{jr}^m = 1|X_{\bullet j}^m)$
$\tau_{iq}^{1,m} = \mathcal{R}^1_{Y^m,\tau}(Z_{iq}^m = 1)$
et $\tau_{jr}^{2,m} = \mathcal{R}^2_{Y^m,\tau}(W_{jr}^m = 1)$
\end{block}
\end{frame}
\begin{frame}{Étape \emph{Variational Expectation}}
$$\widehat{\bm{\tau}}^{(t+1)} = \arg \max_{\bm{\tau}}
\mathcal{J}(\mathcal{\bm{\tau}},\bm{\widehat{\theta}}^{(t)})$$
\[
\widehat{\tau}^{(t+1)} = \arg \max_{\tau}
\mathcal{J}(\mathcal{\tau},\bm{\widehat{\theta}}^{(t)})
\Leftrightarrow \arg\min_{\tau\in\mathcal{T}} \mathbf{KL}[\mathcal{R}_{\mathbf{Y},\tau}, \mathbb{P}(.|\mathbf{Y})]
\]
\begin{equation*}
\begin{cases}
\widehat{\tau}_{iq}^{1,m} \propto \widehat{\pi}_{q}^{m(t)} \prod_{j=1}^{n_2^m}\prod_{r\in\mathcal{Q}_2^m} f(X_{ij}^m;\widehat{\alpha}_{qr}^{(t)})^{\widehat{\tau}_{jr}^{2,m(t+1)}} & \forall i = 1, \dots , n_1^m, q \in \mathcal{Q}_1^m \\
\widehat{\tau}_{jr}^{2,m} \propto \widehat{\rho}_{r}^{m(t)} \prod_{i=1}^{n_1^m}\prod_{q\in\mathcal{Q}_1^m} f(X_{ij}^m;\widehat{\alpha}_{qr}^{(t)})^{\widehat{\tau}_{iq}^{1,m(t+1)}} & \forall j = 1, \dots , n_2^m, r \in \mathcal{Q}_2^m
\widehat{\tau}_{iq}^{1,m} \propto \widehat{\pi}_{q}^{m(t)} \prod_{j=1}^{n_2^m}\prod_{r\in\mathcal{Q}_2^m} f(Y_{ij}^m;\widehat{\alpha}_{qr}^{(t)})^{\widehat{\tau}_{jr}^{2,m(t+1)}} & \forall i = 1, \dots , n_1^m, q \in \mathcal{Q}_1^m \\
\widehat{\tau}_{jr}^{2,m} \propto \widehat{\rho}_{r}^{m(t)} \prod_{i=1}^{n_1^m}\prod_{q\in\mathcal{Q}_1^m} f(Y_{ij}^m;\widehat{\alpha}_{qr}^{(t)})^{\widehat{\tau}_{iq}^{1,m(t+1)}} & \forall j = 1, \dots , n_2^m, r \in \mathcal{Q}_2^m
\end{cases}
\end{equation*}
\footnotetext[2]{Initialisation des $\widehat{\bm{\tau}}$ avec un
\footnotetext[2]{Initialisation des $\widehat{\tau}$ avec un
\emph{spectral clustering} sur les réseaux.}
\end{frame}
\begin{frame}{Étape \emph{Maximization}}
\[
\widehat{\bm{\theta}}^{(t+1)} = \arg \max_{\bm{\theta}} \mathcal{J}(\mathcal{\bm{\widehat{\tau}}}^{(t+1)},\bm{\theta})
\widehat{\theta}^{(t+1)} = \arg \max_{\theta} \mathcal{J}(\mathcal{\bm{\widehat{\tau}}}^{(t+1)},\theta)
\]
\begin{block}{Paramètres de connectivité}
\begin{align*}
\widehat{\alpha}_{qr} = \frac{\sum_{m=1}^{M} \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m} \tau_{jr}^{2,m} X_{ij}^m}{\sum_{m=1}^{M} \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m} \tau_{jr}^{2,m}}
\widehat{\alpha}_{qr} = \frac{\sum_{m=1}^{M} \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m} \tau_{jr}^{2,m} \alert<2>{Y_{ij}^m}}{\sum_{m=1}^{M} \sum_{i=1}^{n_1^m} \sum_{j=1}^{n_2^m} \tau_{iq}^{1,m} \tau_{jr}^{2,m}}
\end{align*}
\end{block}
\only<1>{
@ -224,19 +297,20 @@
\section{Sélection de modèle}
\begin{frame}
\frametitle{Problème de choix de $(Q_1, Q_2)$}
\underline{L'estimation de paramètres se fait à $Q_1, Q_2$ blocs fixés}, il faut donc déterminer les \enquote*{meilleures} coordonnées.
Nous maximisons un critère, le \emph{Bayesian Information Criterion - Like}
(BIC-L), de vraisemblance pénalisée en adaptant les formules
de~\cite{chabert-liddellLearningCommonStructures2024a}.
\frametitle{Problème choix de $(Q_1, Q_2)$}
Besoin sélectionner $Q_1$ et $Q_2$. Critère BIC-Like\footnote{ICL + Entropie + pénalité}
\begin{align*}
\text{BIC-L}(\bm{Y}, Q_1, Q_2) & = \max_{\theta} \mathbb{E}_{\mathcal{R}_{\mathbf{Y},\hat{\tau}}} [\ell_c(\bm{Y,Z,W};\theta)] + \mathcal{H(\mathcal{R}_{\mathbf{Y},\hat{\tau}})} - \frac{1}{2}\text{pen}(\theta, Q_1, Q_2) \\
& = \max_{\theta} \mathcal{J(\mathcal{R}_{\mathbf{Y},\hat{\tau}}, \theta)} - \frac{1}{2}\text{pen}(\theta, Q_1, Q_2)
\end{align*}
\begin{alertblock}{Problèmes de l'exploration}
\begin{itemize}
\item Exploration de l'espace $\mathbb{N}^2$ coûteux, besoin d'une
stratégie.
\item Sensibilité aux initialisations et à l'aléatoire.
\item Exploration de $\mathbb{N}^2$ coûteux.
\item Sensibilité initialisations.
\end{itemize}
\end{alertblock}
\end{frame}
@ -245,7 +319,7 @@
\begin{columns}
\begin{column}{0.5\linewidth}
\begin{tikzpicture}
\input{../tikz/greedy-exploration.tex}
\input{tikz/greedy-exploration.tex}
\end{tikzpicture}
\end{column}
\begin{column}{0.35\linewidth}
@ -279,7 +353,7 @@
\begin{columns}
\begin{column}{0.6\textwidth}
\begin{figure}
\input{../tikz/moving-window.tex}
\input{tikz/moving-window}
\caption{Fenêtre glissante}
\end{figure}
\end{column}
@ -301,11 +375,72 @@
\label{sec:application}
\begin{frame}
\frametitle{Clustering de réseaux}
\frametitle{Résultats~\cite{baldockSystemsApproachReveals2019a}}
\only<1>{
\begin{figure}[ht]
\centering
\begin{tikzpicture}
\input{../tikz/clustering.tex}
\end{tikzpicture}
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includestandalone[width=0.45\textwidth]{tikz/applications/baldock/mat-Baldock2019_Bristol}
\caption{Bristol}
\end{subfigure}\hfil
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includestandalone[width=0.45\textwidth]{tikz/applications/baldock/mat-Baldock2019_Edinburgh}
\caption{Edinburgh}
\end{subfigure}
\newline
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includestandalone[width=0.5\textwidth]{tikz/applications/baldock/mat-Baldock2019_Leeds}
\caption{Leeds}
\end{subfigure}\hfil
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includestandalone[width=0.5\textwidth]{tikz/applications/baldock/mat-Baldock2019_Reading}
\caption{Reading}
\end{subfigure}
\caption{Matrices d'adjacence,~\cite{baldockSystemsApproachReveals2019a}}
\end{figure}
}
\only<2>{
\begin{figure}[ht]
\centering
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includestandalone[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Bristol}
\caption{Bristol}
\end{subfigure}\hfil
\begin{subfigure}[t]{0.5\textwidth}
\centering
\includestandalone[width=0.45\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Edinburgh}
\caption{Edinburgh}
\end{subfigure}
\newline
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includestandalone[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Leeds}
\caption{Leeds}
\end{subfigure}\hfil
\begin{subfigure}[ht]{0.5\textwidth}
\centering
\includestandalone[width=0.5\textwidth]{tikz/applications/baldock/colbisbm-mat-Baldock2019_Reading}
\caption{Reading}
\end{subfigure}
\caption{Matrices d'adjacence réordonnée par \emph{iid}-colBiSBM,~\cite{baldockSystemsApproachReveals2019a}}
\end{figure}
}
\end{frame}
\begin{frame}
\frametitle{Clustering de réseaux}
\begin{figure}[ht]
\includestandalone[width=0.45\textwidth]{tikz/applications/baldock/mat-Baldock2011_TB+Baldock2011_JN}
\caption{Matrice d'adjacence,~\cite{baldockDailyTemporalStructure2011}}
\end{figure}
\end{frame}
\begin{frame}[allowframebreaks]
@ -316,15 +451,13 @@
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[scale=0.2,angle=-90]{backup-app-iid.png}
\caption{Modèle $iid$,\\
séparent réseau africain et réseaux anglais}
\caption{Modèle $iid$}
\end{subfigure}%
~
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[scale=0.2,angle=-90]{backup-app-pirho.png}
\caption{Modèle $\pi\rho$,\\
fusionnent réseaux africain et anglais}
\caption{Modèle $\pi\rho$}
\end{subfigure}%
\caption{Partitionnement des réseaux
de~\cite{baldockDailyTemporalStructure2011,
@ -353,6 +486,36 @@
\end{figure}
\end{frame}
\begin{frame}{Algorithme du clustering}
\centering
\vspace{0.25\baselineskip}
\begin{tikzpicture}[scale=0.85]
\input{tikz/clustering.tex}
\end{tikzpicture}
\[
D_{\mathcal{M}}(m,m') = \sum_{q = 1}^{Q_1} \sum_{r = 1}^{Q_2} \max(\widetilde{\pi}_{q}^{m}, \widetilde{\pi}_{q}^{m'}) \left( \widetilde{\alpha}_{qr}^{m} - \widetilde{\alpha}_{qr}^{m'}\right)^{2} \max(\widetilde{\rho}_{r}^{m}, \widetilde{\rho}_{r}^{m'})
\]
\end{frame}
\begin{frame}{Résultats}
\begin{figure}[ht]
\centering
\begin{subfigure}{0.5\textwidth}
\centering
\includestandalone[width=1\textwidth]{tikz/applications/baldock/bisbm-mat-Baldock2011_TB+Baldock2011_JN}
\caption{Réordonnée par LBM}
\end{subfigure}\hfil
\begin{subfigure}{0.5\textwidth}
\centering
\includestandalone[width=1\textwidth]{tikz/applications/baldock/pirho-colbisbm-mat-Baldock2011_TB+Baldock2011_JN}
\caption{Réordonnée par $\pi\rho$-colBiSBM}
\end{subfigure}
\caption{Matrice d'adjacence réordonnée par $\pi\rho$-colBiSBM,~\cite{baldockDailyTemporalStructure2011}}
\end{figure}
\end{frame}
\section{Conclusion}
\label{sec:conclusion}
\begin{frame}
@ -378,16 +541,15 @@
\begin{itemize}
\item Investiguer stabilité face à l'aléatoire et aux \emph{optima} locaux.
\item Preuve d'identifiabilité du modèle $\pi\rho$.
\item
\end{itemize}
\begin{block}{Package et applications}
\begin{itemize}
\item Intégration au package \texttt{colSBM} et publication CRAN
\item Intégration au package \texttt{colSBM}, amélioration interface utilisateur et ajout retours écologues
\item Publication CRAN
\item Intégrer possibilité d'un critère supplémentaire pour le
clustering
\item Appliquer clustering données de \cite{pichonTellingMutualisticAntagonistic2024,doreRelativeEffectsAnthropogenic2021}
\item
\end{itemize}
\end{block}

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -50,6 +50,15 @@
%% Biblio
\input{../shared/biblio.tex}
\newcommand{\bZ}{\bm{Z}}
\newcommand{\bY}{\bm{Y}}
\newcommand{\bW}{\bm{W}}
\newcommand{\Prob}{\mathbb{P}}
\newcommand{\Ryt}{\mathcal{R}_{\bY,\tau}}
\newcommand{\KL}[2]{\mathbf{KL}[#1,#2]}
\newcommand{\Esp}{\mathbb{E}}
\newcommand{\Hshannon}{\mathcal{H}}
% Footnote
\makeatletter
\newcommand\blfootnote[1]{%

1
soutenance/tikz Symbolic link
View file

@ -0,0 +1 @@
../tikz/

View file

@ -1,4 +1,4 @@
% Created by tikzDevice version 0.12.6 on 2024-08-28 14:31:04
% Created by tikzDevice version 0.12.6 on 2024-08-28 15:36:18
% !TEX encoding = UTF-8 Unicode
\documentclass[10pt]{standalone}\usepackage{tikz}

View file

@ -1,4 +1,4 @@
% Created by tikzDevice version 0.12.6 on 2024-08-28 14:31:07
% Created by tikzDevice version 0.12.6 on 2024-08-28 15:36:22
% !TEX encoding = UTF-8 Unicode
\documentclass[10pt]{standalone}\usepackage{tikz}

View file

@ -1,4 +1,4 @@
% Created by tikzDevice version 0.12.6 on 2024-08-28 14:31:10
% Created by tikzDevice version 0.12.6 on 2024-08-28 15:36:25
% !TEX encoding = UTF-8 Unicode
\documentclass[10pt]{standalone}\usepackage{tikz}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
% Created by tikzDevice version 0.12.6 on 2024-08-28 14:30:59
% Created by tikzDevice version 0.12.6 on 2024-08-28 15:36:13
% !TEX encoding = UTF-8 Unicode
\documentclass[10pt]{standalone}\usepackage{tikz}

View file

@ -1,4 +1,4 @@
% Created by tikzDevice version 0.12.6 on 2024-08-28 14:30:59
% Created by tikzDevice version 0.12.6 on 2024-08-28 15:36:13
% !TEX encoding = UTF-8 Unicode
\documentclass[10pt]{standalone}\usepackage{tikz}

View file

@ -1,4 +1,4 @@
% Created by tikzDevice version 0.12.6 on 2024-08-28 14:31:00
% Created by tikzDevice version 0.12.6 on 2024-08-28 15:36:13
% !TEX encoding = UTF-8 Unicode
\documentclass[10pt]{standalone}\usepackage{tikz}

View file

@ -1,4 +1,4 @@
% Created by tikzDevice version 0.12.6 on 2024-08-28 14:31:00
% Created by tikzDevice version 0.12.6 on 2024-08-28 15:36:13
% !TEX encoding = UTF-8 Unicode
\documentclass[10pt]{standalone}\usepackage{tikz}

View file

@ -1,4 +1,4 @@
% Created by tikzDevice version 0.12.6 on 2024-08-28 14:31:00
% Created by tikzDevice version 0.12.6 on 2024-08-28 15:36:14
% !TEX encoding = UTF-8 Unicode
\documentclass[10pt]{standalone}\usepackage{tikz}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff