Fin ajout retours Sophie

This commit is contained in:
Louis 2025-05-28 11:27:22 +02:00
parent 82929451b3
commit 708e20587b

View file

@ -222,6 +222,10 @@
M}, \alpha)$.
\end{block}
}
\begin{itemize}
\item No shared nodes across networks
\item Agnostic of structure
\end{itemize}
\end{frame}
% \begin{frame}
% \frametitle{Parameter estimation}
@ -269,12 +273,12 @@
\end{align*}
\onslide<3>{
We would like to use Expectation-Maximization (EM) algorithm~\parencite{dempsterMaximumLikelihoodIncomplete1977} but the law of $\mathbf{Z,W|Y},\theta^{(t-1)}$ is untractable due to dependence between rows and columns.}
We would like to use Expectation-Maximization (EM) algorithm~\parencite{dempsterMaximumLikelihoodIncomplete1977} but the law of $\mathbf{Z,W|Y},\theta^{(t-1)}$ is untractable due to dependence between row and column groups.}
\end{frame}
\begin{frame}{Parameter estimation}{Solution}
By \emph{Variational EM}, as proposed
by~\cite{daudinMixtureModelRandom2008,
chabert-liddellLearningCommonStructures2024}.
by~\cite{daudinMixtureModelRandom2008} and adapted for joint simple networks
by~\cite{chabert-liddellLearningCommonStructures2024}.
\begin{block}{Variational approximation of $\bm{Z,W|Y},\theta^{(t-1)}$}
$\mathcal{R}_{Y^m,\tau}(Z^m, W^m) =
\mathcal{R}^1_{Y^m,\tau}(Z^m)
@ -294,15 +298,15 @@
\end{frame}
\begin{frame}{Selection criterion for $Q_1, Q_2$}
\cite{biernackiAssessingMixtureModel2000} introduced the Integrated Classification Likelihood (ICL).
\cite{biernackiAssessingMixtureModel2000} introduced the Integrated Classification Likelihood (ICL):
\begin{align*}
\text{ICL}(\bm{Y}, Q_1, Q_2) & = \mathbb{E} [\ell_c(\bm{Y,Z,W};\hat{\theta})] -\frac{1}{2}\text{pen}(Q_1, Q_2) \\
& = \ell(\mathbf{Y};\hat{\theta}) - \mathcal{H}(p(\mathbf{Z,W}|\mathbf{Y},\hat{\theta})) - \frac{1}{2}\text{pen}(Q_1, Q_2)
\end{align*} leads to low entropy clustering.
\end{align*} leads to low entropy clustering. Common in literature for SBM.
\onslide<2->{
\begin{align*}
\text{BIC-L}(\bm{Y}, Q_1, Q_2) & = \mathbb{E}_{\mathcal{R}_{\mathbf{Y},\hat{\tau}}} [\ell_c(\bm{Y,Z,W};\hat{\theta}^{\text{var}})] + \mathcal{H(\mathcal{R}_{\mathbf{Y},\hat{\tau}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \\
& = \mathcal{J(\mathcal{R}_{\mathbf{Y},\hat{\tau}}, \hat{\theta}^{\text{var}})} - \frac{1}{2}\text{pen}(Q_1, Q_2)
\text{BIC-L}(\bm{Y}, & Q_1, Q_2) = \mathbb{E}_{\mathcal{R}_{\mathbf{Y},\hat{\tau}}} [\ell_c(\bm{Y,Z,W};\hat{\theta}^{\text{var}})] + \mathcal{H(\mathcal{R}_{\mathbf{Y},\hat{\tau}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \\
& = \mathcal{J(\mathcal{R}_{\mathbf{Y},\hat{\tau}}, \hat{\theta}^{\text{var}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \textcolor{red}{\leq \log p(\mathbf{Y};\hat{\theta}^{\text{MV}})- \frac{1}{2}\text{pen}(Q_1, Q_2)} \\
\end{align*}
because we want fuzzier clustering.
}