diff --git a/principal.tex b/principal.tex index c1ffb78..d486b26 100644 --- a/principal.tex +++ b/principal.tex @@ -222,6 +222,10 @@ M}, \alpha)$. \end{block} } + \begin{itemize} + \item No shared nodes across networks + \item Agnostic of structure + \end{itemize} \end{frame} % \begin{frame} % \frametitle{Parameter estimation} @@ -269,12 +273,12 @@ \end{align*} \onslide<3>{ - We would like to use Expectation-Maximization (EM) algorithm~\parencite{dempsterMaximumLikelihoodIncomplete1977} but the law of $\mathbf{Z,W|Y},\theta^{(t-1)}$ is untractable due to dependence between rows and columns.} + We would like to use Expectation-Maximization (EM) algorithm~\parencite{dempsterMaximumLikelihoodIncomplete1977} but the law of $\mathbf{Z,W|Y},\theta^{(t-1)}$ is untractable due to dependence between row and column groups.} \end{frame} \begin{frame}{Parameter estimation}{Solution} By \emph{Variational EM}, as proposed - by~\cite{daudinMixtureModelRandom2008, - chabert-liddellLearningCommonStructures2024}. + by~\cite{daudinMixtureModelRandom2008} and adapted for joint simple networks + by~\cite{chabert-liddellLearningCommonStructures2024}. \begin{block}{Variational approximation of $\bm{Z,W|Y},\theta^{(t-1)}$} $\mathcal{R}_{Y^m,\tau}(Z^m, W^m) = \mathcal{R}^1_{Y^m,\tau}(Z^m) @@ -294,15 +298,15 @@ \end{frame} \begin{frame}{Selection criterion for $Q_1, Q_2$} - \cite{biernackiAssessingMixtureModel2000} introduced the Integrated Classification Likelihood (ICL). + \cite{biernackiAssessingMixtureModel2000} introduced the Integrated Classification Likelihood (ICL): \begin{align*} \text{ICL}(\bm{Y}, Q_1, Q_2) & = \mathbb{E} [\ell_c(\bm{Y,Z,W};\hat{\theta})] -\frac{1}{2}\text{pen}(Q_1, Q_2) \\ & = \ell(\mathbf{Y};\hat{\theta}) - \mathcal{H}(p(\mathbf{Z,W}|\mathbf{Y},\hat{\theta})) - \frac{1}{2}\text{pen}(Q_1, Q_2) - \end{align*} leads to low entropy clustering. + \end{align*} leads to low entropy clustering. Common in literature for SBM. \onslide<2->{ \begin{align*} - \text{BIC-L}(\bm{Y}, Q_1, Q_2) & = \mathbb{E}_{\mathcal{R}_{\mathbf{Y},\hat{\tau}}} [\ell_c(\bm{Y,Z,W};\hat{\theta}^{\text{var}})] + \mathcal{H(\mathcal{R}_{\mathbf{Y},\hat{\tau}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \\ - & = \mathcal{J(\mathcal{R}_{\mathbf{Y},\hat{\tau}}, \hat{\theta}^{\text{var}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) + \text{BIC-L}(\bm{Y}, & Q_1, Q_2) = \mathbb{E}_{\mathcal{R}_{\mathbf{Y},\hat{\tau}}} [\ell_c(\bm{Y,Z,W};\hat{\theta}^{\text{var}})] + \mathcal{H(\mathcal{R}_{\mathbf{Y},\hat{\tau}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \\ + & = \mathcal{J(\mathcal{R}_{\mathbf{Y},\hat{\tau}}, \hat{\theta}^{\text{var}})} - \frac{1}{2}\text{pen}(Q_1, Q_2) \textcolor{red}{\leq \log p(\mathbf{Y};\hat{\theta}^{\text{MV}})- \frac{1}{2}\text{pen}(Q_1, Q_2)} \\ \end{align*} because we want fuzzier clustering. }