mia-rapport-2024/presentation/presentation.tex

\documentclass{beamer}
\usetheme{Boadilla}

% importations
\usepackage[french]{babel} % pour dire que le texte est en francais
\usepackage{csquotes}
\usepackage[T1]{fontenc} % pour les font postscript
\usepackage[cyr]{aeguill} % Police vectorielle TrueType, guillemets francais
\usepackage{epsfig} % pour gérer les images
\usepackage{amsmath,amsthm, stmaryrd} % très bon mode mathématique
\usepackage{amsfonts,amssymb,bm, bbold}% permet la definition des ensembles
\usepackage{algorithm2e} % pour les algorithmes
\usepackage{algpseudocode} % pour les algorithmes
\usepackage{graphicx}
\usepackage{float} % pour le placement des figure
\usepackage{url} % pour une gestion efficace des url
\usepackage{hyperref}  % pour les hyperliens dans le document
\usepackage{tikz} % For graph plots
\usepackage[outline]{contour}
\usepackage{adjustbox} % To resize tikzpictures
\usepackage{fontawesome5}
\usepackage{makecell}

% Beamer
\setbeamertemplate{headline}{%
    \begin{beamercolorbox}[ht=2.25ex,dp=3.75ex]{section in head/foot}
        \insertnavigation{\paperwidth}
    \end{beamercolorbox}%
}%
\beamertemplatenavigationsymbolsempty % Pas de bar de navigation
\setbeamerfont{caption}{size=\scriptsize} % Petit titre de figures

% bibliographie
\usepackage[style=apa,sorting=none]{biblatex}
\addbibresource{../references.bib}

% Images
\graphicspath{{../img/}{../figure/}}

% Tikz
%% Tikz Related
\usetikzlibrary{calc,shapes,backgrounds,arrows,automata,shadows,positioning}
\usetikzlibrary{arrows,shapes,positioning,shadows,trees,calc,backgrounds,automata,positioning}
\usetikzlibrary{decorations.pathreplacing,calligraphy,external,petri}

%% Tikz sets
\tikzset{
    basic/.style  = {draw, text width=3cm, font=\sffamily, rectangle},
    root/.style   = {basic, rounded corners=2pt, thin, align=center,
            fill=green!30},
    level 2/.style = {basic, rounded corners=6pt, thin,align=center, fill=green!60,
            text width=8em},
    level 3/.style = {basic, thin, align=left, fill=pink!60, text width=3.5cm}
}

% Couleurs
% pour tickz multilevel
\definecolor{ao(english)}{rgb}{0.0, 0.5, 0.0}
\definecolor{redorg}{RGB}{215, 48, 39}
\definecolor{orangeorg}{RGB}{253, 174, 97}

\definecolor{blueind}{RGB}{016, 101, 171}
\definecolor{cyanind}{RGB}{058, 147, 195}
\definecolor{electricblue}{RGB}{142, 196, 222}

\definecolor{greenind}{RGB}{112, 130, 56}

\definecolor{burntorange}{RGB}{179, 021, 041}
\definecolor{goldenyellow}{RGB}{215, 095, 076}
\definecolor{peach}{RGB}{246, 164, 130}

\definecolor{gray}{RGB}{128, 128, 128}

% Footnote
\makeatletter
\newcommand\blfootnote[1]{%
  \begingroup
  \renewcommand{\@makefntext}[1]{\noindent\makebox[1.8em][r]#1}
  \renewcommand\thefootnote{}\footnote{#1}%
  \addtocounter{footnote}{-1}%
  \endgroup
}
\makeatother


\subtitle{Séminaire des stagiaires}
\title[Collections de réseaux bipartites]{Détection de structure dans des réseaux bipartites}
\author[L. Lacoste]{Louis \textsc{Lacoste}} % Sous la supervision de Pierre
\date{4 juillet 2024}

\begin{document}

% titre
\begin{frame}[noframenumbering,plain]
    \maketitle
\end{frame}

\section{Contexte du modèle}
\label{sec:contexte-du-modele}
\begin{frame}
    \frametitle{Contexte écologique}
    \begin{itemize}
        \item Faire de la détection de structure sur un réseau (SBM, LBM) mais intérêt à le faire sur plusieurs
        \item De nombreux réseaux disponibles \parencite{WebLifeEcological} et décrivant des interactions similaires
        \item Re-grouper les réseaux selon leur similarité (\emph{clustering} de réseaux)
        \item Transférer de l'information grâce à la collection (par exemple reconstitution de données manquantes)
        \item Déterminer des structures d'interactions fines de manière agnostique % Pas d'idee preco
              %\item Vérifier si le regroupement est lié à des co-variables
    \end{itemize}
\end{frame}
\begin{frame}
    \frametitle{Réseaux bipartites\footnote{Ou \emph{bipartis}. Voir~\cite{larousseDefinitionsBipartiBipartite}.}}
    \begin{columns}[c]
        \begin{column}{0.65\textwidth}
            \begin{figure}[ht]
                \centering
                \begin{tikzpicture}[scale=.65]
                    \input{../tikz/plantpollinatornetwork.tex}
                \end{tikzpicture}
                \caption{Exemple d'un réseau plantes-pollinisateurs}
                \label{fig:plantes-pollin}

            \end{figure}
        \end{column}
        \hfill
        \begin{column}{0.35\linewidth}
            \centering
            \begin{align*}
                X = \begin{pmatrix}
                        1 & 1 & 1 & 1 & 0 & 0 \\
                        0 & 0 & 0 & 1 & 0 & 1 \\
                        1 & 0 & 0 & 0 & 1 & 0 \\
                        0 & 0 & 0 & 0 & 1 & 0
                    \end{pmatrix}
            \end{align*}
            \footnotesize
            Matrice d'adjacence associée
        \end{column}
    \end{columns}
    \smallskip
    Permet de décrire des interactions impliquant deux agents dont les rôles
    sont de natures différentes.\\
    Par exemple : hôtes-parasites, plantes-pollinisateurs, graines-disperseurs \dots
\end{frame}

\begin{frame}
    \frametitle{Latent Block Model (LBM\footnotemark[2])}
    %DONE remplacer i \in bullet par Zi = \bullet
    Proposé par~\cite{govaertEMAlgorithmBlock2005}.
    \begin{columns}
        \begin{column}{0.40\linewidth}
            \begin{figure}[H]
                \center
                \begin{tikzpicture}[scale=0.35]
                    \input{../tikz/lbm.tex}
                \end{tikzpicture}
                \caption{Exemple de LBM\footnotemark}
                \label{fig:LBMvisu}
            \end{figure}
        \end{column}
        \begin{column}{0.51\linewidth}
            Pour \begin{itemize}
                \item $Q_1 = |\{{\color{blueind}\bullet},{\color{cyanind}\bullet},{\color{electricblue}\bullet}\}|$ blocs fixés en ligne
                \item $Q_2 = |\{{\color{burntorange}\bullet},{\color{goldenyellow}\bullet},{\color{peach}\bullet}\}|$ blocs fixés en colonne
            \end{itemize}
            \begin{block}{Paramètres}
                \begin{itemize}
                    \item $\pi_{\bullet} = \mathbb{P}(Z_i = \bullet)$ en ligne et $\rho_{\bullet} = \mathbb{P}(W_j = \bullet)$ en colonne
                    \item $\alpha_{{\color{blueind}\bullet}{\color{burntorange}\bullet}} = \mathbb{P}(X_{ij} = 1 | Z_i = {\color{blueind}\bullet}, W_j = {\color{burntorange}\bullet})$
                \end{itemize}
            \end{block}
        \end{column}
    \end{columns}

    \footnotetext{Que j'appellerai par la suite BiSBM}

\end{frame}

\section{Extension de \emph{colSBM} aux réseaux bipartites}
\label{sec:extension-de-colsbm-aux-reseaux-bipartites}
\begin{frame}
    \frametitle{Collections bipartites}
    \begin{tikzpicture}[scale=0.33]
        \input{../tikz/collbm-iid.tex}
    \end{tikzpicture}

    \begin{itemize}
        \item $Q_1 = |\{{\color{blueind}\bullet},{\color{cyanind}\bullet},{\color{electricblue}\bullet}\}|$ blocs fixés en ligne
        \item $Q_2 = |\{{\color{burntorange}\bullet},{\color{goldenyellow}\bullet},{\color{peach}\bullet}\}|$ blocs fixés en colonne
    \end{itemize}
    \begin{block}{Paramètres}
        \begin{itemize}
            \item $\pi_{\bullet} = \mathbb{P}(Z_i =\bullet)$ en ligne et $\rho_{\bullet} = \mathbb{P}(W_j = \bullet)$ en colonne
            \item $\alpha_{{\color{blueind}\bullet}{\color{burntorange}\bullet}} = \mathbb{P}(X_{ij} = 1  | Z_i = {\color{blueind}\bullet}, W_j = {\color{burntorange}\bullet})$
        \end{itemize}
    \end{block}
\end{frame}

\begin{frame}
    \frametitle{Différents modèles}
    \only<1>{
        \begin{tikzpicture}[scale=0.33]
            \input{../tikz/collbm-iid.tex}
        \end{tikzpicture}
        \begin{block}{\emph{iid-colBiSBM}}
            $\bm{\pi} = (\pi_1, \dots \pi_{Q_1})$ et $\bm{\rho} = (\rho_1, \dots \rho_{Q_2})$
        \end{block}
    }
    \only<2>{
        \begin{tikzpicture}[scale=0.33]
            \input{../tikz/collbm-pirho.tex}
        \end{tikzpicture}
        \begin{block}{\emph{$\pi\rho$-colBiSBM}}
            $\bm{\pi} = ((\pi_{\color{black}1}^{\color{red}m}, \dots \pi_{\color{black}Q_1}^{\color{red}m}))_{m=1,\dots M}$ et $\bm{\rho} = ((\rho_{\color{black}1}^{\color{red}m}, \dots \rho_{\color{black}Q_2}^{\color{red}m}))_{m=1,\dots M}$ %{$\forall q \in \llbracket 1, Q_1 - 1\rrbracket, \pi_q > 0$ et $\forall r \in \llbracket 1, Q_2 - 1\rrbracket, \rho_r > 0$}
            \small \\
            avec $\forall q,m \in \llbracket 1, Q_1 \rrbracket \times \llbracket 1, M \rrbracket, \pi_q^m \in \left[ 0,1 \right]$
            et $\forall r,m \in \llbracket 1, Q_2 \rrbracket \times \llbracket 1, M \rrbracket, \rho_r^m \in \left[ 0,1 \right]$
        \end{block}
    }
    Dans tous les modèles la structure de connectivité ($\bm{\alpha}$) est supposée identique au sein de la collection.
\end{frame}
\begin{frame}
    \frametitle{Estimation des paramètres}
    % DONE dire que tau i q m c' est la proba que Zim = q, approximation de la proba variationnelle. Parce qu on impose lindependance
    Maximisation d'une borne inférieure de la log-vraisemblance des données observées.
    \begin{multline*}
        \ell (\bm{X};\bm{\theta}) \geq \color{red}\sum_{m=1}^{M} \bigg( \color{black} \sum_{i = 1}^{n_1^m}\sum_{j=1}^{n_2^m}\sum_{q \in \mathcal{Q}_{1,m}} \sum_{r \in \mathcal{Q}_{2,m}} \tau^{1,m}_{i,q} \tau^{2,m}_{j,r} \log f(X^{m}_{ij}; \alpha_{qr}) \\
        + \sum_{i=1}^{n_1^m} \sum_{q \in \mathcal{Q}_{1,m}} \tau^{1,m}_{i,q} \log \pi_{\color{black}q}^{\color{gray}m} + \sum_{j=1}^{n_2^m} \sum_{r \in \mathcal{Q}_{2,m}} \tau^{2,m}_{j,r} \log \rho_{\color{black}r}^{\color{gray}m} \\
        - \sum_{i=1}^{n_1} \tau^{1,m}_{i,q} \log \tau^{1,m}_{i,q} - \sum_{j=1}^{n_2} \tau^{2,m}_{j,r} \log \tau^{2,m}_{j,r} \color{red}\bigg) \color{black} =: J(\bm{\tau};\bm{\theta}) $$
    \end{multline*}

    \begin{block}{Approximation variationnelle}
        $\tau_{i,q}^{1,m} = P(Z_i = q | X^m_{ij})$ et $\tau_{j,r}^{2,m} = P(W_j = r | X^m_{ij})$ tels que $P(Z_i = q, W_j = r | X^m_{ij}) = \tau_{i,q}^{1,m}\times\tau_{j,r}^{2,m}$
    \end{block}

\end{frame}

\begin{frame}
    \frametitle{Sélection de modèle : choix de $(Q_1,Q_2)$ - Approche gloutonne}
    % DONE But maximiser un critere le BICL, deplacer voir St Clair dans la note
    % VEM a Q1 Q2 fixer
    % Choix de Q1 Q2 par maximisation du BICL
    % Itemize dans la box : init, explo voisin, arrets
    \underline{Le VEM se fait à $Q_1, Q_2$ fixés}, il faut donc déterminer les \enquote*{meilleures} coordonnées.
    Nous maximisons un BIC-L\footnote{\emph{Bayesian Information Criterion - Like}, en adaptant les formules de~\cite{chabert-liddellLearningCommonStructures2023}}.

    Détermination d'un premier mode par approche \emph{gloutonne} \smallskip
    \begin{columns}
        \begin{column}{0.5\linewidth}
            \begin{tikzpicture}
                \input{../tikz/greedy-exploration.tex}
            \end{tikzpicture}
        \end{column}
        \begin{column}{0.5\linewidth}
            \begin{block}{Exploration gloutonne}
                \begin{itemize}
                    \item Initialisation sur $(1,2)$ et $(2,1)$
                    \item Exploration des 4 voisins et déplacement sur le meilleur des 4
                    \item Arrêt après 2 étapes successives sans augmentation du BIC-L
                \end{itemize}
            \end{block}
        \end{column}
    \end{columns}
\end{frame}
\begin{frame}
    \frametitle{Sélection de modèle : choix de $(Q_1,Q_2)$ - Fenêtre glissante}
    \begin{columns}
        \begin{column}{0.6\textwidth}
            \begin{figure}
                \input{../tikz/moving-window.tex}
                \caption{Fenêtre glissante}
            \end{figure}
        \end{column}
        \begin{column}{0.4\textwidth}
            \only<3>{\begin{block}{}
                    Initialisation du modèle si nécessaire
                \end{block}}
            \only<9>{\begin{block}{}
                    Localisation du nouveau mode
                \end{block}}
            \only<10>{\begin{block}{}
                    Déplacement sur le nouveau mode puis itération
                \end{block}}
        \end{column}
    \end{columns}


\end{frame}

\begin{frame}
    \frametitle{Clustering de réseaux}
    \begin{tikzpicture}
        \tikzstyle{instruct}=[font=\small, text justified, rectangle,draw,fill=yellow!50]
        \tikzstyle{first_col}=[rectangle, text justified, draw,fill=gray!50]
        \tikzstyle{second_col}=[scale=0.55, circle, draw,fill=red!50]
        \tikzstyle{test}=[font=\small, text justified, diamond, aspect=2.5,thick,
        draw=blue,fill=yellow!50,text=blue]
        \tikzstyle{es}=[font=\small, text justified, rectangle,draw,rounded corners=4pt,fill=cyanind!25]

        \node[es] (liste) at (0,4) {Donner une collection à partitionner};
        \node[instruct, text width=5cm, below = 0.45cm of liste] (1-collection) {Ajuster \emph{colBiSBM}};
        \node[first_col, right = 0.5cm of 1-collection] (1-col-obj) {};
        \node[instruct, text width=5cm, below = 0.45cm of 1-collection] (dissimi) {Calculer une matrice de dissimilarité de la collection};
        \node[instruct, text width=5cm, below = 0.45cm of dissimi] (2-sous-collection) {Séparer la \emph{collection en 2 sous-collections} et ajuster les \emph{colBiSBM}};
        \node[second_col, right = 0.25cm of 2-sous-collection] (1-sec-col-obj) {1};
        \node[second_col, right = 0.25cm of 1-sec-col-obj] (1-sec-col-obj) {2};
        \node[test,below = 0.45cm of 2-sous-collection, scale=0.5] (BICL-test) {$\sum_{i=1}^{2} (\text{BIC-L}(\tikz[baseline=-0.25cm]{\node[second_col] {i};} )) > \text{BIC-L}(\tikz[baseline=-0.25cm]{\node[first_col] {};})$?};
        \node[es, right = 0.55cm of BICL-test] (sortie) {Renvoyer \tikz{\node[rectangle, draw, fill=gray!50, rounded corners=0pt] {};}};
        \node[es, left = 0.45cm of dissimi, text width = 2cm] (recursion) {Recommencer sur \tikz{\node[second_col] {1};} et \tikz{\node[second_col] {2};} };

        \tikzstyle{suite}=[->,>=stealth,thick,rounded corners=4pt]
        \draw[suite] (liste) -- (1-collection);
        \draw[suite] (1-collection) -- (dissimi);
        \draw[suite] (dissimi) -- (2-sous-collection);
        \draw[suite] (2-sous-collection) -- (BICL-test);
        \draw[suite] (BICL-test) -| node[near start, above, fill=none] {Oui} (recursion);
        \draw[suite] (recursion.north) |- (1-collection.west);
        \draw[suite] (BICL-test) -- node[near start, above, fill=none] {Non} (sortie);

    \end{tikzpicture}
    \blfootnote{Même approche que~\cite{chabert-liddellLearningCommonStructures2023}}
\end{frame}

\section{Application}
\label{sec:application}

\section{Conclusion}
\label{sec:conclusion}
\begin{frame}
    \frametitle{Conclusion et perspectives}
    % DONE Ajouter une slide conclusion perspective
    % Rappeler les modeles avec clustering
    % Evoquer l'analyse de reseaux corrigés pour l'échantillonnage
    % Lien vers le package

    \begin{itemize}
        \item 4 modèles dont 3 qui ont une flexibilité sur au moins une des dimensions (adaptabilité aux données)
        \item Partitionner un ensemble de réseaux selon leurs structures
        \item Comparer les \emph{clusterings} de réseaux obtenus entre données brutes et données corrigées (par exemple par la méthode \emph{CoOPLBM}\footnote{~\cite{anakokDisentanglingStructureEcological2022}})
    \end{itemize}

    \bigskip
    \centering
    Le package est disponible sur GitHub : \faGithub  \url{https://github.com/Chabert-Liddell/colSBM}

    \bigskip
    \huge
    Merci pour votre attention !

\end{frame}
\renewcommand{\pgfuseimage}[1]{\scalebox{.75}{\includegraphics{#1}}}
\begin{frame}[noframenumbering,plain,allowframebreaks]
    \frametitle{Bibliographie}
    \printbibliography
\end{frame}

\end{document}